Beispiel #1
0
 def train(features, steps=10):
     bestStumps = []
     minError = 99999999
     featureList = features[0][0].keys()
     
     for i in range(steps):
         classifiers = []
         #print len(features)
         weights = [5 for n in features]
         errors = []
         shuffle(featureList)
         for f in featureList:
             #print "feature name={0}".format(f)
             ds, matches = DecisionStump.train(features,f, weights, 15)
             #print len(features)
             #print features[0][0]
             AdaBoost.setWeights(weights, matches)
             #AdaBoost.printWeights(weights)
             errors.append(ds.getErrorRate())
             classifiers.append(ds)
         
         error = sum(errors)
         print("error=",error)
         if error < minError:
             minError = error
             bestStumps = list(classifiers)
             _max = max(errors)
             votingWeights = [1 - e / _max for e in errors]
             
     print("minError=",minError)
     return AdaBoost4({'featureList':featureList, 'classifiers':bestStumps, 'votingWeights':votingWeights})
Beispiel #2
0
    def train(features, steps=10):
        classifiers = []
        #print len(features)
        weights = [1 for n in features]
        errors = []
        bestStump = {}
        featureList = features[0][0].keys()
        
        for i in range(0, steps):
            print "iteration: {0}".format(i)
            minError = 99999999
            for f in featureList:
                #print "feature name={0}".format(f)
                ds, matches = DecisionStump.train(features,f, weights)
                error = ds.getErrorRate()
                if error < minError:
                    minError = error
                    bestStump["stump"] = ds
                    bestStump["matches"] = matches
            minError = minError / len(features)
            alpha = float(0.5 * log((1.0-minError) / max(minError, 1e-16)))
            bestStump["alpha"] = alpha
            classifiers.append(bestStump)
            AdaBoost2.setWeights(weights, bestStump["matches"], alpha)
            errors.append(minError)
            #aggErrorRate = sum(errors) / len(features)
            print "error={0}".format(minError)
            if minError == 0:
                break
 
            
        _max = max(errors)
        
        votingWeights = [1 - e / sum(errors) for e in errors]
        return AdaBoost2({'featureList':featureList, 'classifiers':classifiers, 'votingWeights':votingWeights})
Beispiel #3
0
def adaboost_algo_random(x_train, y_train, testing_x, testing_y, max_iter):
    # Initialize weights to 1/n initially
    w = np.ones(len(x_train)) / len(x_train)

    dec_classifiers = []
    weighted_error = math.inf

    for iter_number in range(max_iter):
        classifier = DecisionStump()

        feature = randrange(0, len(x_train[0]))
        f_values = x_train[:, feature]
        unique_feature = set(f_values)
        unique_feature = list(unique_feature)
        random_index = randrange(len(unique_feature))
        threshold_val = unique_feature[random_index]
        stump_prediction = np.ones((np.shape(y_train)))
        stump_prediction[f_values < threshold_val] = -1
        weighted_error = np.sum(w[y_train != stump_prediction])

        if weighted_error > 0.5:
            p = -1
            weighted_error = 1 - weighted_error
        else:
            p = 1
        classifier.threshold = threshold_val
        classifier.feature = feature
        classifier.polarity = p
        classifier.alpha = 0.5 * math.log(
            (1.0 - weighted_error) / (weighted_error + 1e-10))

        predictions = np.ones(y_train.shape)
        negative_idx = (classifier.polarity * x_train[:, classifier.feature] <
                        classifier.polarity * classifier.threshold)
        predictions[negative_idx] = -1

        # Updating w

        w *= np.exp(-classifier.alpha * y_train * predictions)

        w /= np.sum(w)

        dec_classifiers.append(classifier)

        # Printing and verification after each step

        # prediction_y_train = predict(dec_classifiers, x_train)
        # prediction_y_test = predict(dec_classifiers, testing_x)

        # training_accuracy = evaluate_prediction_accuracy(y_train, prediction_y_train)
        # testing_accuracy = evaluate_prediction_accuracy(testing_y, prediction_y_test)

        # auc_val = roc_auc_score(testing_y, prediction_y_test)

        # print("Round number", iter_number, "Feature:", classifier.feature, "Threshold:", classifier.threshold,
        #       "Weighted error", weighted_error, "Training_error", 1 - training_accuracy, "Testing_error",
        #       1 - testing_accuracy,
        #       "AUC", auc_val)

    return dec_classifiers
    def get_hypothesis(self, sampled_train_data):
        """
        :param pandas.DataFrame sampled_train_data:
        :return: self.best_stump
        :type: DecisionStump
        """
        self.stumps = []
        self.max_gain = -float("inf")  # negative infinity
        self.best_stump = None  # type: DecisionStump

        for name in self.stump_names:
            stump_temp = DecisionStump(name)
            stump_temp.train(sampled_train_data)
            self.stumps.append(stump_temp)

        for stump in self.stumps:
            gain_temp = stump.get_info_gain()
            if gain_temp > self.max_gain:
                self.max_gain = gain_temp
                self.best_stump = stump

        return self.best_stump
Beispiel #5
0
 def train(features, steps=10):
     classifiers = []
     #print len(features)
     weights = [10 for n in features]
     errors = []
     featureList = features[0][0].keys()
     
     for f in featureList:
         #print "feature name={0}".format(f)
         ds, matches = DecisionStump.train(features,f, weights, steps)
         #print len(features)
         #print features[0][0]
         AdaBoostAtCascade.setWeights(weights, matches)
         #AdaBoost.printWeights(weights)
         errors.append(ds.getErrorRate())
         classifiers.append(ds)
         
     _max = max(errors)
     votingWeights = [1 - e / _max for e in errors]
     return AdaBoostAtCascade({'featureList':featureList, 'classifiers':classifiers, 'votingWeights':votingWeights})
def adaboost_algo_interval(training_x, training_y, testing_x, testing_y,
                           max_iter):
    # Initialize weights to 1/n initially
    w = np.ones(len(training_x)) / len(training_x)

    dec_classifiers = []

    for iter_number in range(max_iter):

        classifier = DecisionStump()
        min_weighted_error = math.inf

        # Best decision stump
        for j in range(len(training_x[0])):

            f_values = training_x[:, j]
            unique_feature = set(f_values)
            unique_feature_linespace = np.linspace(min(unique_feature),
                                                   max(unique_feature),
                                                   num=4)

            for threshold in unique_feature_linespace:
                stump_prediction = np.ones((np.shape(training_y)))
                stump_prediction[f_values < threshold] = -1

                weighted_error = np.sum(w[training_y != stump_prediction])

                if weighted_error > 0.5:
                    p = -1
                    weighted_error = 1 - weighted_error
                else:
                    p = 1

                if weighted_error < min_weighted_error:
                    min_weighted_error = weighted_error

                    classifier.threshold = threshold
                    classifier.feature = j
                    classifier.polarity = p
        classifier.alpha = 0.5 * math.log(
            (1.0 - min_weighted_error) / (min_weighted_error + 1e-10))

        predictions = np.ones(training_y.shape)
        negative_idx = (classifier.polarity * training_x[:, classifier.feature]
                        < classifier.polarity * classifier.threshold)
        predictions[negative_idx] = -1

        # Updating w
        # print(w.shape, y_train.shape, predictions.shape)
        # print(type(w), type(y_train), type(predictions))
        w *= np.exp(-classifier.alpha * training_y * predictions)

        w /= np.sum(w)

        dec_classifiers.append(classifier)

        # Printing and verification after each step

        # prediction_y_train = predict(dec_classifiers, training_x)
        # prediction_y_test = predict(dec_classifiers, testing_x)
        #
        # training_accuracy = evaluate_prediction_accuracy(training_y, prediction_y_train)
        # testing_accuracy = evaluate_prediction_accuracy(testing_y, prediction_y_test)
        #
        # auc_val = roc_auc_score(testing_y, prediction_y_test)
        #
        # print("Round number", iter_number, "Feature:", classifier.feature, "Threshold:", classifier.threshold,
        #       "Weighted error", min_weighted_error, "Training_error", 1 - training_accuracy, "Testing_error",
        #       1 - testing_accuracy,
        #       "AUC", auc_val)

    return dec_classifiers
def adaboost_algo(dataset, y_train, testing_x, testing_y, max_iter):
    # Initialize weights to 1/n initially

    w = np.ones(len(dataset)) / len(dataset)

    dec_classifiers = []

    for iter_number in range(max_iter):
        classifier = DecisionStump()
        min_weighted_error = math.inf

        # Best decision stump
        for j in range(len(dataset[0]) - 1):

            f_values = dataset[:, j]
            unique_feature = set(f_values)

            for threshold in unique_feature:
                # stump_prediction = make_prediction_from_stump(f_values, threshold)
                stump_prediction = np.ones((np.shape(y_train)))
                stump_prediction[f_values < threshold] = -1

                weighted_error = np.sum(w[y_train != stump_prediction])

                if weighted_error > 0.5:
                    p = -1
                    weighted_error = 1 - weighted_error
                else:
                    p = 1

                if weighted_error < min_weighted_error:
                    min_weighted_error = weighted_error

                    classifier.threshold = threshold
                    classifier.feature = j
                    classifier.polarity = p
        classifier.alpha = 0.5 * math.log(
            (1.0 - min_weighted_error) / (min_weighted_error + 1e-10))

        predictions = np.ones(y_train.shape)
        negative_idx = (classifier.polarity * dataset[:, classifier.feature] <
                        classifier.polarity * classifier.threshold)
        predictions[negative_idx] = -1

        # Updating w
        # print(w.shape, y_train.shape, predictions.shape)
        # print(type(w), type(y_train), type(predictions))
        w *= np.exp(-classifier.alpha * y_train * predictions)

        w /= np.sum(w)

        dec_classifiers.append(classifier)

        # Printing and verification after each step

        prediction_y_train = predict(dec_classifiers, dataset[:, 0:57])
        prediction_y_test = predict(dec_classifiers, testing_x)

        training_accuracy = evaluate_prediction_accuracy(
            y_train, prediction_y_train)
        testing_accuracy = evaluate_prediction_accuracy(
            testing_y, prediction_y_test)

        auc_val = roc_auc_score(testing_y, prediction_y_test)

        print("Round number", iter_number, "Feature:", classifier.feature,
              "Threshold:", classifier.threshold, "Weighted error",
              min_weighted_error, "Training_error", 1 - training_accuracy,
              "Testing_error", 1 - testing_accuracy, "AUC", auc_val)

    return dec_classifiers