예제 #1
0
def getModel():
    featureMat = loadFeatures()
    print("features loading over...")
    random.shuffle(featureMat)

    train_data = featureMat[:5000, :-1]
    train_label = featureMat[:5000, -1].reshape(-1, 1)

    clf = Adaboost(n_estimators=200, debug=True)
    clf.fit(train_data, train_label)

    return clf
예제 #2
0
파일: classify.py 프로젝트: ebridge2/cs_475
def train(instances, args):
    # if the user explicitly requests a weighted model
    if (args.algorithm == "averaged_perceptron"):
        predictor = Weighted_Perceptron(args.online_learning_rate, args.online_training_iterations)
    elif (args.algorithm == "perceptron"): # use a simple perceptron model
        predictor = Perceptron(args.online_learning_rate, args.online_training_iterations)
    elif (args.algorithm == "mc_perceptron"):
        predictor = MC_Perceptron(args.online_training_iterations)
    elif (args.algorithm == "margin_perceptron"):
        predictor = Margin_Perceptron(args.online_learning_rate, args.online_training_iterations)
    elif (args.algorithm == "pegasos"):
        predictor = Pegasos(args.pegasos_lambda, args.online_training_iterations)
    elif (args.algorithm == "knn"):
        predictor = Standard_knn(args.knn)
    elif (args.algorithm == "distance_knn"):
        predictor = Distance_knn(args.knn)
    elif (args.algorithm == "adaboost"):
        predictor = Adaboost(args.num_boosting_iterations)
    elif (args.algorithm == "lambda_means"):
        predictor = Lambda_Means(args.cluster_lambda, args.clustering_training_iterations)
    elif (args.algorithm == "nb_clustering"):
        predictor = Naive_Bayes(args.num_clusters, args.clustering_training_iterations)
    # train it on the data
    else:
        raise ValueError("You did not pass a relevant algorithm name." +
                         "Options are 'averaged_perceptron', 'perceptron'." +
                         "'margin_perceptron', and 'pegasos'. You passed: " +
                         str(algorithm))
    predictor.train(instances)
    return predictor # return it back
예제 #3
0
def handle_predict(argv):
    hypothesis = None
    model = None
    with open(argv[3], "r") as f:
        # DONT DO THIS ITS INSECURE. IM INSANE
        model = f.readline().strip('\n')
        hypothesis = f.readline()
    f.close()
    hypothesis = literal_eval(hypothesis)
    tree = Adaboost()
    tree.define_positive_class(lambda x: x.classification == 'en')
    tree.define_classes(processing.classes)
    tree.define_attributes(processing.attr_definitions)
    examples = process_file(argv[4], training=False)
    examples = tree.create_examples(examples)
    return tree.classify(examples, hypothesis)
def train_adaboost(train, test, model_parameters=[1, "SAMME", 200]):
    num_classifier = len(train)
    predicted_label = numpy.zeros((num_classifier, test.shape[0]))
    predicted_max_label = numpy.zeros((test.shape[0]))
    for i in range(num_classifier):
        for j in range(i):
            adaboost = Adaboost(train[i], train[j], model_parameters)
            adaboost.adaboost_train()
            label_1, label_2 = adaboost.adaboost_predict(test[:, 0:-1])
            predicted_label[i, :] = predicted_label[i, :] + label_1
            predicted_label[j, :] = predicted_label[j, :] + label_2
    compare_matrix = (predicted_label == numpy.max(predicted_label, axis=0))
    for i in range(compare_matrix.shape[1]):
        for j in range(compare_matrix.shape[0]):
            if (compare_matrix[j][i] == 1):
                predicted_max_label[i] = j
    print_report('Adaboost', test[:, -1], predicted_max_label)
def train(instances):
    # print('starting knn training')
    p = None
    if args.algorithm == 'knn':
        p = Knn(max_size, args.knn)
        p.train(instances)
    elif args.algorithm == 'adaboost':
        p = Adaboost(max_size, max_max_index, args.num_boosting_iterations)
        p.train(instances)
    # print('ending training')
    return p
def run_adaboost(X, y, data1, data2):
    # init avg
    avg_train = np.zeros(8)
    avg_test = np.zeros(8)

    # runs
    n = 50
    total_avg = 0
    for _ in range(n):
        # split the data to train and test
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.5,
                                                            random_state=5)
        train_x = []
        test_x = []
        train_y = y_train.to_numpy()
        test_y = y_test.to_numpy()
        for i, row in X_train.iterrows():
            p = Point(row[data1], row[data2])
            train_x.append(p)

        for i, row in X_test.iterrows():
            p = Point(row[data1], row[data2])
            test_x.append(p)

        # call adaboost
        ad = Adaboost()
        ad.train(train_x, train_y)
        predicts = ad.predict(test_x, test_y)
        sum = 0
        for i in range(len(predicts)):
            if predicts[i] == test_y[i]:
                sum += 1
        total_avg += sum / len(predicts)

    print("אחוז הצלחה", total_avg / n)
예제 #7
0
def handle_train(argv):
    examples = process_file(argv[2], training=True)
    tree = None
    if argv[4] == "dt":
        tree = DecisionTree()
    else:
        tree = Adaboost()
    tree.define_positive_class(lambda x: x.classification == 'en')
    tree.define_classes(processing.classes)
    tree.define_attributes(processing.attr_definitions)
    tree.load_examples(examples)
    tree.generate(tree.examples)
    with open(argv[3], "w") as f:
        f.write(argv[4] + "\n")
        f.write(str(tree.tree))
    f.close()
    tree.print()
예제 #8
0
def train(examples, hypothesis_out, algo):
    """
    To train the model according to the user's choice.
    :param examples: the training data
    :param hypothesis_out: the file to store the model
    :param algo: the algorithm that the user choose. dt stands for decision tree,
                 ada stands for adaboost
    :return:
    """
    if algo == DECISION_TREE:
        model = DecisionTree(train_file=examples, out_file=hypothesis_out)

    elif algo == ADABOOST:
        model = Adaboost(train_file=examples, out_file=hypothesis_out)

    else:
        print("Please use \"dt\" as decision tree model, \"ada\" as Adaboost model")
        sys.exit()
    model.train()
예제 #9
0
def handle_predict(argv):
    hypothesis = None
    model = None
    with open(argv[2], "r") as f:
        model = f.readline().strip('\n')
        hypothesis = f.readline()
    f.close()
    hypothesis = literal_eval(hypothesis)
    tree = None
    if model == "dt":
        tree = DecisionTree()
    else:
        tree = Adaboost()
    tree.define_positive_class(lambda x: x.classification == 'en')
    tree.define_classes(processing.classes)
    tree.define_attributes(processing.attr_definitions)
    examples = process_file(argv[3], training=False)
    examples = tree.create_examples(examples)
    for classification in tree.classify(examples, hypothesis):
        print(classification)
예제 #10
0
파일: classify.py 프로젝트: ebridge2/cs_475
def train(instances, algorithm, rate, iterations, lambd, knn, boost_iter):
    # if the user explicitly requests a weighted model
    if (algorithm == "averaged_perceptron"):
        predictor = Weighted_Perceptron(rate, iterations)
    elif (algorithm == "perceptron"):  # use a simple perceptron model
        predictor = Perceptron(rate, iterations)
    elif (algorithm == "margin_perceptron"):
        predictor = Margin_Perceptron(rate, iterations)
    elif (algorithm == "pegasos"):
        predictor = Pegasos(lambd, iterations)
    elif (algorithm == "knn"):
        predictor = Standard_knn(knn)
    elif (algorithm == "distance_knn"):
        predictor = Distance_knn(knn)
    elif (algorithm == "adaboost"):
        predictor = Adaboost(boost_iter)
    # train it on the data
    else:
        raise ValueError("You did not pass a relevant algorithm name." +
                         "Options are 'averaged_perceptron', 'perceptron'." +
                         "'margin_perceptron', and 'pegasos'. You passed: " +
                         str(algorithm))
    predictor.train(instances)
    return predictor  # return it back
예제 #11
0
def main():
    #Read in the dataset
    X = np.loadtxt('banknote_auth/data_banknote_auth.csv',delimiter=',')
    Y = np.loadtxt('banknote_auth/labels_banknote_auth.csv',dtype=str)
    
    #Map labels to {-1,1}
    labels = list(set(Y))
    Y = np.array([1 if y == labels[0] else -1 for y in Y])
    
    #Split the dataset
    training_ratio = 0.5 # Defines how much percentage of data to be used for training
    n = len(Y)
    perm = np.random.permutation(n)
    train_data_size = int(n * training_ratio)
    print(n)
    print(train_data_size)
    train_idx, test_idx = perm[:train_data_size], perm[train_data_size:]
    train_data = X[train_idx,:]
    train_label = Y[train_idx]
    test_data = X[test_idx,:]
    test_label = Y[test_idx]
    
    #Maximum number of weak learners to be used in Adaboost
    max_num_weak_learners = 69
    
    #Train and test error
    train_error = []
    test_error = []
    
    #Training Adaboost with weak learners
    model = Adaboost()
    for m in range(1,max_num_weak_learners+1):
        print("Training Adaboost with weak learners %d" % m)
        model.add_learner(train_data,train_label)
        train_error.append(model.prediction_error(train_data,train_label))
        test_error.append(model.prediction_error(test_data,test_label))
    
    print("Initial Training Error=%.4f  Testing Error= %.4f " % (train_error[0],test_error[0]))
    print("Final Training Error=%.4f  Testing Error= %.4f " % (train_error[-1],test_error[-1]))
    plot_results(X,Y,train_idx,test_idx,train_error,test_error)   
예제 #12
0
def handle_train(argv, size=None, depth=None):
    examples = process_file(argv[2], training=True)
    tree = Adaboost()
    tree.define_positive_class(lambda x: x.classification == 'en')
    tree.define_classes(processing.classes)
    tree.define_attributes(processing.attr_definitions)
    tree.load_examples(examples)
    tree.generate(tree.examples)
    with open(argv[3], "w") as f:
        f.write("ada" + "\n")
        f.write(str(tree.tree))
    f.close()
    tree.print()
예제 #13
0
    print 'Start time', time()

    if model == 'best':
        model = 'nnet'

    # K-Nearest neighbors
    if model == 'nearest' and phase == 'train':
        model = knn.train(image_list)
        serialize_to_file(model, model_file)
    elif model == 'nearest' and phase == 'test':
        model = deserialize_from_file(model_file)
        knn.test(image_list, model)

    # ADA boost
    elif model == "adaboost" and phase == "train":
        params = Adaboost(image_list).adaboost()
        serialize_to_file(params, model_file)
    elif model == "adaboost" and phase == "test":
        params = deserialize_from_file(model_file)
        Adaboost(image_list).adaboost_test(image_list, params)

    # Neural net
    elif model == 'nnet' and phase == 'train':
        net = neural_net.train(image_list)
        serialize_to_file(net, model_file)
    elif model == 'nnet' and phase == 'test':
        net = deserialize_from_file(model_file)
        neural_net.test(net, image_list)

    print 'End time', time()
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split

from adaboost import Adaboost


def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy


data = datasets.load_breast_cancer()
X = data.data
y = data.target

y[y == 0] = -1

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=5)

# Adaboost classification with 5 weak classifiers
clf = Adaboost(n_clf=5)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

acc = accuracy(y_test, y_pred)
print("Accuracy:", acc)
예제 #15
0
from adaboost import Adaboost
from data_manager import DataManager
from classifier_weak import ClassifierWeak
import numpy as np

adb = Adaboost(200, 200)
dm = DataManager()
percentage = 60


def count_correct_predictions(predictions, tags):
    hits = 0

    for (p, t) in zip(predictions, tags):
        hits += 1 if p == t else 0

    return hits


def print_results(prediction_training, prediction_test, training_tags,
                  test_tags):
    size_training = len(training_tags)
    size_test = len(test_tags)
    training_hits = count_correct_predictions(prediction_training,
                                              training_tags)
    test_hits = count_correct_predictions(prediction_test, test_tags)

    training_hits_percentage = training_hits * 100 / size_training
    test_hits_percentage = test_hits * 100 / size_test

    print('\nNumber of weak classifiers per strong: ' + str(adb.T_CLASSIFIERS))
예제 #16
0
pca = PCA()
X = pca.transform(X, n_components=5) # Reduce to 5 dimensions


# ..........................
#  TRAIN / TEST SPLIT
# ..........................
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)
# Rescale label for Adaboost to {-1, 1}
rescaled_y_train = 2*y_train - np.ones(np.shape(y_train))
rescaled_y_test = 2*y_test - np.ones(np.shape(y_test))

# .......
#  SETUP
# .......
adaboost = Adaboost(n_clf = 8)
naive_bayes = NaiveBayes()
knn = KNN(k=4)
logistic_regression = LogisticRegression()
mlp = MultilayerPerceptron(n_hidden=20)
perceptron = Perceptron()
decision_tree = DecisionTree()
random_forest = RandomForest(n_estimators=150)
support_vector_machine = SupportVectorMachine(C=1, kernel=rbf_kernel)

# ........
#  TRAIN
# ........
print "Training:"
print "\tAdaboost"
adaboost.fit(X_train, rescaled_y_train)
예제 #17
0
# ..........................
pca = PCA()
X = pca.transform(X, n_components=5)  # Reduce to 5 dimensions

# ..........................
#  TRAIN / TEST SPLIT
# ..........................
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)
# Rescale label for Adaboost to {-1, 1}
rescaled_y_train = 2 * y_train - np.ones(np.shape(y_train))
rescaled_y_test = 2 * y_test - np.ones(np.shape(y_test))

# .......
#  SETUP
# .......
adaboost = Adaboost(n_clf=8)
naive_bayes = NaiveBayes()
knn = KNN(k=4)
logistic_regression = LogisticRegression()
mlp = MultilayerPerceptron(n_hidden=20)
perceptron = Perceptron()
decision_tree = DecisionTree()
random_forest = RandomForest(n_estimators=150)
support_vector_machine = SupportVectorMachine(C=1, kernel=rbf_kernel)

# ........
#  TRAIN
# ........
print "Training:"
print "\tAdaboost"
adaboost.fit(X_train, rescaled_y_train)
예제 #18
0
def train(instances, algorithm, high_idx, learn_rate, iterate, peg_lambda,
          k_val, T, clus_lambda, K, clus_iter):

    if (algorithm == "perceptron"):
        classifier = Perceptron(instances, high_idx, learn_rate)
        #iterate the training
        for i in range(iterate):
            classifier.train(instances)
        return classifier

    elif (algorithm == "averaged_perceptron"):
        classifier2 = AveragePerceptron(instances, high_idx, learn_rate)
        for i in range(iterate):
            classifier2.train(instances)
        return classifier2

    elif (algorithm == "pegasos"):
        classifier3 = Pegasos(instances, high_idx, peg_lambda)
        for i in range(iterate):
            classifier3.train(instances)
        return classifier3

    elif (algorithm == "margin_perceptron"):
        classifier4 = PerceptronMargin(instances, high_idx, learn_rate,
                                       iterate)
        for i in range(iterate):
            classifier4.train(instances)
        return classifier4

    elif (algorithm == "knn"):
        classifier5 = KNN(instances, k_val, high_idx)
        for i in range(iterate):
            classifier5.train(instances)
        return classifier5

    elif (algorithm == "distance_knn"):
        classifier6 = Distance_KNN(instances, k_val, high_idx)
        for i in range(iterate):
            classifier6.train(instances)
        return classifier6

    elif (algorithm == "adaboost"):
        classifier7 = Adaboost(instances, T, high_idx)
        for i in range(iterate):
            classifier7.train(instances)
        return classifier7
    elif (algorithm == "lambda_means"):
        classifier8 = Lambda_Means2(instances, high_idx, clus_lambda,
                                    clus_iter)
        for i in range(iterate):
            #print "##################Training", i+1, "out of", iterate,"###############"
            classifier8.train(instances)
        return classifier8
    elif (algorithm == "nb_clustering"):
        classifier9 = Naive_Bayes(instances, high_idx, K)
        for i in range(iterate):
            #print "##################Training", i+1, "out of", iterate,"###############"
            classifier9.train(instances)
        return classifier9
    elif (algorithm == "mc_perceptron"):
        classifier10 = MC_Perceptron(instances, high_idx)
        for i in range(iterate):
            classifier10.train(instances)
        return classifier10

    else:
        return None