def train(self,
              train,
              train_target,
              test,
              test_target,
              T=100,
              percentage=0.5):
        k, n = self.selected_code.shape

        train = train[:int(len(train) * percentage)]
        train_target = train_target[:int(len(train_target) * percentage)]
        first_time = True
        predictors = None
        for f in range(n):
            print "Run Adaboost on function %f" % f
            codes = self.selected_code[:, f]
            labels = self.convert_to_binary(train_target, codes)
            test_labels = self.convert_to_binary(test_target, codes)
            learner = OptimalWeakLearner()
            if not first_time:
                learner.set_predictors(predictors)
            adaboost = AdaBoost(learner)
            adaboost.boost(train,
                           labels,
                           test,
                           test_labels,
                           T,
                           calculate_auc=False)
            self.functions.append(adaboost)
            if first_time:
                first_time = False
                predictors = learner.get_predictors()
def entire():
    data, target = load_crx()
    train, test, train_target, test_target = train_test_shuffle_split(data, target, len(data) / 10)
    train_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, train_target))
    test_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, test_target))

    adaboost = AdaBoost(OptimalWeakLearner())
    adaboost.boost(train, train_target, test, test_target, discrete_features=range(train.shape[1]))
def random_weak_learner():
    print '==============Random Weak Learner============'
    train, target = load_spambase()
    train, test, train_target, test_target = train_test_shuffle_split(train, target, len(train) / 10)
    train_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, train_target))
    test_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, test_target))
    adaboost = AdaBoost(RandomChooseLeaner())
    adaboost.boost(train, train_target, test, test_target, T=200)
Esempio n. 4
0
def random_weak_learner():
    print '==============Random Weak Learner============'
    train, target = load_spambase()
    train, test, train_target, test_target = train_test_shuffle_split(
        train, target,
        len(train) / 10)
    train_target = np.array(
        map(lambda v: -1.0 if v == 0 else 1.0, train_target))
    test_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, test_target))
    adaboost = AdaBoost(RandomChooseLeaner())
    adaboost.boost(train, train_target, test, test_target, T=200)
def entire():
    data, target = load_vote()
    train, test, train_target, test_target = train_test_shuffle_split(
        data, target,
        len(data) / 10)
    train_target = np.array(
        map(lambda v: -1.0 if v == 0 else 1.0, train_target))
    test_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, test_target))

    adaboost = AdaBoost(OptimalWeakLearner())
    adaboost.boost(train,
                   train_target,
                   test,
                   test_target,
                   discrete_features=range(train.shape[1]))
def cross():
    train, target = load_vote()
    target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, target))

    k = 10
    train_size = len(train)
    test_index_generator = k_fold_cross_validation(train_size, k)
    fold = 1
    overall_acc = 0
    overall_error = 0
    overall_auc = 0

    for start, end in test_index_generator:
        print "====================Fold %s============" % fold
        k_fold_train = np.vstack(
            (train[range(0, start)], train[range(end, train_size)]))
        test = train[range(start, end)]
        train_target = np.append(target[range(0, start)],
                                 target[range(end, train_size)])
        test_target = target[range(start, end)]

        adaboost = AdaBoost(OptimalWeakLearner())
        acc, err, auc = adaboost.boost(k_fold_train, train_target, test,
                                       test_target)

        overall_auc += auc
        overall_acc += acc
        overall_error += err
        fold += 1

    print "Overall test accuracy: %s, overall test error: %s, overall test auc: %s" % (
        overall_acc / k, overall_error / k, overall_auc / k)
def cross():
    train, target = load_crx()
    train, target = shuffle(train, target)
    target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, target))

    k = 10
    train_size = len(train)
    test_index_generator = k_fold_cross_validation(train_size, k)
    fold = 1
    overall_acc = 0
    overall_error = 0
    overall_auc = 0

    for start, end in test_index_generator:
        print "====================Fold %s============" % fold
        k_fold_train = np.vstack((train[range(0, start)], train[range(end, train_size)]))
        test = train[range(start, end)]
        train_target = np.append(target[range(0, start)], target[range(end, train_size)])
        test_target = target[range(start, end)]

        adaboost = AdaBoost(OptimalWeakLearner())
        acc, err, auc = adaboost.boost(k_fold_train, train_target, test, test_target)

        overall_auc += auc
        overall_acc += acc
        overall_error += err
        fold += 1

    print "Overall test accuracy: %s, overall test error: %s, overall test auc: %s" % (
        overall_acc / k, overall_error / k, overall_auc / k)
Esempio n. 8
0
def optimal_weak_learner_on_random_data():
    data, target = load_spambase()
    train, test, train_target, test_target = train_test_shuffle_split(
        data, target,
        len(data) / 10)
    train_target = np.array(
        map(lambda v: -1.0 if v == 0 else 1.0, train_target))
    test_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, test_target))

    indices = range(len(train))
    param = 0.05
    res = []
    while param < 0.5:
        print "Choose %.2f%% of data" % (param * 100)
        choose_size = int(len(indices) * param)
        choose_indices = random.sample(indices, choose_size)

        X = train[choose_indices]
        Y = train_target[choose_indices]

        adaboost = AdaBoost(OptimalWeakLearner())
        acc, err, auc = adaboost.boost(X, Y, test, test_target)
        res.append((acc, err, auc))
        param += 0.05

    print res
    def active_learning(self, train, train_target, test, test_target):
        param = 0.05
        increment = 0.05
        init_size = int(len(train) * param)
        increment_size = int(len(train) * increment)

        X = train[:init_size]
        Y = train_target[:init_size]
        R = train[init_size:]
        RY = train_target[init_size:]

        while param < 0.5:
            print "labeled data: %.2f%%" % (100.0 * len(X) / len(train))
            adaboost = AdaBoost(OptimalWeakLearner())
            acc, err, auc = adaboost.boost(X, Y, test, test_target)
            self.result.append((acc, err, auc))
            H = adaboost.hypothesis(R)
            H_abs = np.abs(H)
            sorted_indices = H_abs.argsort().tolist()
            selected = sorted_indices[:increment_size]
            remained = sorted_indices[increment_size:]

            X = np.vstack((X, R[selected]))
            # Y = np.append(Y, adaboost.sign(H[selected]))
            Y = np.append(Y, RY[selected])
            R = R[remained]
            RY = RY[remained]
            param += increment
    def active_learning(self, train, train_target, test, test_target):
        param = 0.05
        increment = 0.05
        init_size = int(len(train) * param)
        increment_size = int(len(train) * increment)

        X = train[:init_size]
        Y = train_target[:init_size]
        R = train[init_size:]
        RY = train_target[init_size:]

        while param < 0.5:
            print "labeled data: %.2f%%" % (100.0 * len(X)/len(train))
            adaboost = AdaBoost(OptimalWeakLearner())
            acc, err, auc = adaboost.boost(X, Y, test, test_target)
            self.result.append((acc, err, auc))
            H = adaboost.hypothesis(R)
            H_abs = np.abs(H)
            sorted_indices = H_abs.argsort().tolist()
            selected = sorted_indices[:increment_size]
            remained = sorted_indices[increment_size:]

            X = np.vstack((X, R[selected]))
            # Y = np.append(Y, adaboost.sign(H[selected]))
            Y = np.append(Y, RY[selected])
            R = R[remained]
            RY = RY[remained]
            param += increment
Esempio n. 11
0
    def train(self, train, train_target, test, test_target, T=100, percentage = 0.5):
        k, n = self.selected_code.shape

        train = train[:int(len(train) * percentage)]
        train_target = train_target[:int(len(train_target) * percentage)]
        first_time = True
        predictors = None
        for f in range(n):
            print "Run Adaboost on function %f" % f
            codes = self.selected_code[:, f]
            labels = self.convert_to_binary(train_target, codes)
            test_labels = self.convert_to_binary(test_target, codes)
            learner = OptimalWeakLearner()
            if not first_time:
                learner.set_predictors(predictors)
            adaboost = AdaBoost(learner)
            adaboost.boost(train, labels, test, test_labels, T, calculate_auc=False)
            self.functions.append(adaboost)
            if first_time:
                first_time = False
                predictors = learner.get_predictors()
def random_c():
    data, target = load_crx()
    train, test, train_target, test_target = train_test_shuffle_split(data, target, len(data) / 10)
    train_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, train_target))
    test_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, test_target))

    indices = range(len(train))
    param = 0.05
    res = []
    while param < 0.5:
        print "Choose %.2f%% of data" % (param * 100)
        choose_size = int(len(indices) * param)
        choose_indices = random.sample(indices, choose_size)

        X = train[choose_indices]
        Y = train_target[choose_indices]

        adaboost = AdaBoost(OptimalWeakLearner())
        acc, err, auc = adaboost.boost(X, Y, test, test_target, discrete_features=[0, 3, 4, 5, 6, 8, 9, 11, 12])
        res.append((acc, err, auc))
        param += 0.05

    print res
init_target = train_target[:init_size]

remain_dataset = train[init_size:]
remain_target = train_target[init_size:]


# Active learning
X = init_dataset
Y = init_target
R = remain_dataset
RY = remain_target
result = []
while percentage < 0.5:
    print "labeled data: %.2f%%" % (100.0 * len(X) / len(train))
    adaboost = AdaBoost(OptimalWeakLearner())
    acc, err, auc = adaboost.boost(X, Y, test, test_target)
    result.append((acc, err, auc))
    H = adaboost.hypothesis(R)
    H_abs = np.abs(H)
    sorted_indices = H_abs.argsort().tolist()
    selected = sorted_indices[:increment_size]
    remained = sorted_indices[increment_size:]

    X = np.vstack((X, R[selected]))
    # Y = np.append(Y, adaboost.sign(H[selected]))
    Y = np.append(Y, RY[selected])
    R = R[remained]
    RY = RY[remained]
    percentage += increment

Esempio n. 14
0
def optimal_weak_learner():
    print '==============Optimal Weak Learner============'
    train, target = load_spambase()
    train, target = shuffle(train, target)
    target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, target))

    k = 10
    train_size = len(train)
    test_index_generator = k_fold_cross_validation(train_size, k)
    fold = 1
    overall_acc = 0
    overall_error = 0
    overall_auc = 0

    for start, end in test_index_generator:
        print "====================Fold %s============" % fold
        k_fold_train = np.vstack(
            (train[range(0, start)], train[range(end, train_size)]))
        test = train[range(start, end)]
        train_target = np.append(target[range(0, start)],
                                 target[range(end, train_size)])
        test_target = target[range(start, end)]

        adaboost = AdaBoost(OptimalWeakLearner())
        plot = False
        if fold == 1:
            plot = True
        else:
            plot = False
        acc, err, auc = adaboost.boost(k_fold_train,
                                       train_target,
                                       test,
                                       test_target,
                                       plot=plot)

        if plot:
            test_err_points = np.array(adaboost.test_err_array)
            train_err_points = np.array(adaboost.train_err_array)
            auc_points = np.array(adaboost.test_auc_array)
            round_err_points = np.array(adaboost.weighted_err_array)
            plt.xlabel('Round')
            plt.ylabel('Error Rate')
            plt.plot(test_err_points[:, 0],
                     test_err_points[:, 1],
                     c='r',
                     label='Test Error')
            plt.plot(test_err_points[:, 0],
                     train_err_points[:, 1],
                     c='g',
                     label='Train Error')
            plt.plot(test_err_points[:, 0],
                     round_err_points[:, 1],
                     c='b',
                     label='Round Error')
            plt.legend(bbox_to_anchor=(0., 1.02, 1., .102),
                       loc=3,
                       ncol=2,
                       mode="expand",
                       borderaxespad=0.)
            plt.show()

            plt.xlabel('Round')
            plt.ylabel('AUC')
            plt.plot(test_err_points[:, 0],
                     auc_points[:, 1],
                     c='r',
                     label='AUC')
            plt.legend(bbox_to_anchor=(0., 1.02, 1., .102),
                       loc=3,
                       ncol=2,
                       mode="expand",
                       borderaxespad=0.)
            plt.show()

        overall_auc += auc
        overall_acc += acc
        overall_error += err

        if fold == 1:
            hypo = adaboost.hypothesis(test)
            roc_points = roc(test_target, hypo, 1.0, -1.0)
            plt.xlabel('FPR')
            plt.ylabel('TPR')
            plt.xlim(xmin=0)
            plt.ylim(ymin=0)
            plt.scatter(roc_points[:, 1], roc_points[:, 0])
            plt.show()
        fold += 1

    print "Overall test accuracy: %s, overall test error: %s, overall test auc: %s" % (
        overall_acc / k, overall_error / k, overall_auc / k)
Esempio n. 15
0
init_dataset = train[:init_size]
init_target = train_target[:init_size]

remain_dataset = train[init_size:]
remain_target = train_target[init_size:]

# Active learning
X = init_dataset
Y = init_target
R = remain_dataset
RY = remain_target
result = []
while percentage < 0.5:
    print "labeled data: %.2f%%" % (100.0 * len(X) / len(train))
    adaboost = AdaBoost(OptimalWeakLearner())
    acc, err, auc = adaboost.boost(X, Y, test, test_target)
    result.append((acc, err, auc))
    H = adaboost.hypothesis(R)
    H_abs = np.abs(H)
    sorted_indices = H_abs.argsort().tolist()
    selected = sorted_indices[:increment_size]
    remained = sorted_indices[increment_size:]

    X = np.vstack((X, R[selected]))
    # Y = np.append(Y, adaboost.sign(H[selected]))
    Y = np.append(Y, RY[selected])
    R = R[remained]
    RY = RY[remained]
    percentage += increment

# boosting
def optimal_weak_learner():
    print '==============Optimal Weak Learner============'
    train, target = load_spambase()
    train, target = shuffle(train, target)
    target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, target))

    k = 10
    train_size = len(train)
    test_index_generator = k_fold_cross_validation(train_size, k)
    fold = 1
    overall_acc = 0
    overall_error = 0
    overall_auc = 0

    for start, end in test_index_generator:
        print "====================Fold %s============" % fold
        k_fold_train = np.vstack((train[range(0, start)], train[range(end, train_size)]))
        test = train[range(start, end)]
        train_target = np.append(target[range(0, start)], target[range(end, train_size)])
        test_target = target[range(start, end)]

        adaboost = AdaBoost(OptimalWeakLearner())
        plot = False
        if fold == 1:
            plot = True
        else:
            plot = False
        acc, err, auc = adaboost.boost(k_fold_train, train_target, test, test_target, plot=plot)

        if plot:
            test_err_points = np.array(adaboost.test_err_array)
            train_err_points = np.array(adaboost.train_err_array)
            auc_points = np.array(adaboost.test_auc_array)
            round_err_points = np.array(adaboost.weighted_err_array)
            plt.xlabel('Round')
            plt.ylabel('Error Rate')
            plt.plot(test_err_points[:, 0], test_err_points[:, 1], c='r', label='Test Error')
            plt.plot(test_err_points[:, 0], train_err_points[:, 1], c='g', label='Train Error')
            plt.plot(test_err_points[:, 0], round_err_points[:, 1], c='b', label='Round Error')
            plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.)
            plt.show()

            plt.xlabel('Round')
            plt.ylabel('AUC')
            plt.plot(test_err_points[:, 0], auc_points[:, 1], c='r', label='AUC')
            plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.)
            plt.show()

        overall_auc += auc
        overall_acc += acc
        overall_error += err

        if fold == 1:
            hypo = adaboost.hypothesis(test)
            roc_points = roc(test_target, hypo, 1.0, -1.0)
            plt.xlabel('FPR')
            plt.ylabel('TPR')
            plt.xlim(xmin=0)
            plt.ylim(ymin=0)
            plt.scatter(roc_points[:, 1], roc_points[:, 0])
            plt.show()
        fold += 1

    print "Overall test accuracy: %s, overall test error: %s, overall test auc: %s" % (
        overall_acc / k, overall_error / k, overall_auc / k)