예제 #1
0
def plot_prediction(
        learner=None,
        data=None,
        minx=0,
        maxx=5,
        step_size=0.01,  # for plotting
        label="function"):
    plt.ion()
    plt.xlabel("x")
    plt.ylabel("y")
    if data is None:
        data = Data_from_file('data/simp_regr.csv',
                              prob_test=0,
                              boolean_features=False,
                              target_index=-1)
    if learner is None:
        learner = Linear_learner(data, squashed=False)
    learner.learning_rate = 0.001
    learner.learn(100)
    learner.learning_rate = 0.0001
    learner.learn(1000)
    learner.learning_rate = 0.00001
    learner.learn(10000)
    learner.display(
        1, "function learned is", learner.predictor_string(), "error=",
        data.evaluate_dataset(data.train, learner.predictor, "sum-of-squares"))
    plt.plot([e[0] for e in data.train], [e[-1] for e in data.train],
             "bo",
             label="data")
    plt.plot(list(arange(minx, maxx, step_size)),
             [learner.predictor([x]) for x in arange(minx, maxx, step_size)],
             label=label)
    plt.legend()
    plt.draw()
예제 #2
0
def plot_polynomials(data=None,
                     learner_class=Linear_learner,
                     max_degree=5,
                     minx=0,
                     maxx=5,
                     num_iter=100000,
                     learning_rate=0.0001,
                     step_size=0.01,   # for plotting
                     ):
    plt.ion()
    plt.xlabel("x")
    plt.ylabel("y")
    if data is None:
        data = Data_from_file('data/simp_regr.csv', prob_test=0,
                              boolean_features=False, target_index=-1)
    plt.plot([e[0] for e in data.train], [e[-1] for e in data.train], "ko", label="data")
    x_values = list(arange(minx, maxx, step_size))
    line_styles = ['-', '--', '-.', ':']
    colors = ['0.5', 'k', 'k', 'k', 'k']
    for degree in range(max_degree):
        data_aug = Data_set_augmented(data, [power_feat(n) for n in range(1, degree + 1)],
                                      include_orig=False)
        learner = learner_class(data_aug, squashed=False)
        learner.learning_rate = learning_rate
        learner.learn(num_iter)
        learner.display(1, "For degree", degree,
                        "function learned is", learner.predictor_string(),
                        "error=", data.evaluate_dataset(data.train, learner.predictor, "sum-of-squares"))
        ls = line_styles[degree % len(line_styles)]
        col = colors[degree % len(colors)]
        plt.plot(x_values, [learner.predictor([x]) for x in x_values], linestyle=ls, color=col,
                 label="degree=" + str(degree))
        plt.legend(loc='upper left')
        plt.draw()
예제 #3
0
def plot_steps(learner=None,
               data=None,
               criterion="sum-of-squares",
               step=1,
               num_steps=1000,
               log_scale=True,
               label=""):
    """
    plots the training and test error for a learner.
    data is the 
    learner_class is the class of the learning algorithm
    criterion gives the evaluation criterion plotted on the y-axis
    step specifies how many steps are run for each point on the plot
    num_steps is the number of points to plot
    
    """
    plt.ion()
    plt.xlabel("step")
    plt.ylabel("Average " + criterion + " error")
    if log_scale:
        plt.xscale('log')  #plt.semilogx()  #Makes a log scale
    else:
        plt.xscale('linear')
    if data is None:
        data = Data_from_file('data/holiday.csv',
                              num_train=19,
                              target_index=-1)
        #data = Data_from_file('data/SPECT.csv', target_index=0)
        # data = Data_from_file('data/mail_reading.csv', target_index=-1)
        # data = Data_from_file('data/carbool.csv', target_index=-1)
    random.seed(None)  # reset seed
    if learner is None:
        learner = Linear_learner(data)
    train_errors = []
    test_errors = []
    for i in range(1, num_steps + 1, step):
        test_errors.append(
            data.evaluate_dataset(data.test, learner.predictor, criterion))
        train_errors.append(
            data.evaluate_dataset(data.train, learner.predictor, criterion))
        learner.display(2, "Train error:", train_errors[-1], "Test error:",
                        test_errors[-1])
        learner.learn(num_iter=step)
    plt.plot(range(1, num_steps + 1, step),
             train_errors,
             ls='-',
             c='k',
             label="training errors")
    plt.plot(range(1, num_steps + 1, step),
             test_errors,
             ls='--',
             c='k',
             label="test errors")
    plt.legend()
    plt.pause(0.001)
    plt.show(block=True)
    learner.display(1, "Train error:", train_errors[-1], "Test error:",
                    test_errors[-1])
예제 #4
0
def test(**args):
    data = Data_from_file('data/SPECT.csv', target_index=0)
    # data = Data_from_file('data/mail_reading.csv', target_index=-1)
    # data = Data_from_file('data/carbool.csv', target_index=-1)
    learner = Linear_learner(data, **args)
    learner.learn()
    print("function learned is", learner.predictor_string())
    for ecrit in Data_set.evaluation_criteria:
        test_error = data.evaluate_dataset(data.test, learner.predictor, ecrit)
        print("    Average", ecrit, "error is", test_error)
예제 #5
0
def testMain():
    data = Data_from_file('data/mail_reading.csv', target_index=-1)
    #data = Data_from_file('data/mail_reading_consis.csv', target_index=-1)
    #data = Data_from_file('data/SPECT.csv',  prob_test=0.5, target_index=0)
    #data = Data_from_file('data/holiday.csv', target_index=-1) #, num_train=19)
    nn1 = NN(data)
    nn1.add_layer(Linear_complete_layer(nn1, 3))
    nn1.add_layer(Sigmoid_layer(nn1))  # comment this or the next
    # nn1.add_layer(ReLU_layer(nn1))
    nn1.add_layer(Linear_complete_layer(nn1, 1))
    nn1.add_layer(Sigmoid_layer(nn1))
    nn1.learning_rate = 0.1
    #nn1.learn(100)

    from learnLinear import plot_steps
    import time
    start_time = time.perf_counter()
    plot_steps(learner=nn1, data=data, num_steps=10000)
    for eg in data.train:
        print(eg, nn1.predictor(eg))
    end_time = time.perf_counter()
    print("Time:", end_time - start_time)
예제 #6
0
            train_errors.append( sum(self.distance(self.class_of_eg(eg),eg)
                                         for eg in self.dataset.train)
                                 /len(self.dataset.train))
            if self.dataset.test:
                test_errors.append( sum(self.distance(self.class_of_eg(eg),eg)
                                            for eg in self.dataset.test)
                                     /len(self.dataset.test))
        plt.plot(range(1,maxstep+1),train_errors,
                 label=str(self.num_classes)+" classes. Training set")
        if self.dataset.test:
            plt.plot(range(1,maxstep+1),test_errors,
                     label=str(self.num_classes)+" classes. Test set")
        plt.legend()
        plt.draw()

%data = Data_from_file('data/emdata1.csv', num_train=10, target_index=2000) % trivial example
data = Data_from_file('data/emdata2.csv', num_train=10, target_index=2000)
%data = Data_from_file('data/emdata0.csv', num_train=14, target_index=2000) % example from textbook
kml = K_means_learner(data,2)
num_iter=4
print("Class assignment after",num_iter,"iterations:")
kml.learn(num_iter); kml.show_classes()

# Plot the error
# km2=K_means_learner(data,2); km2.plot_error(20)   # 2 classes
# km3=K_means_learner(data,3); km3.plot_error(20)   # 3 classes
# km13=K_means_learner(data,13); km13.plot_error(20) # 13 classes

# data = Data_from_file('data/carbool.csv', target_index=2000,boolean_features=True)
# kml = K_means_learner(data,3)
# kml.learn(20); kml.show_classes()
예제 #7
0
# Testing

from learnDT import DT_learner
from learnProblem import Data_set, Data_from_file


def sp_DT_learner(min_prop=0.9):
    def make_learner(dataset):
        mne = len(dataset.train) * min_prop
        return DT_learner(dataset, min_number_examples=mne)

    return make_learner


data = Data_from_file('data/carbool.csv', target_index=-1)
#data = Data_from_file('data/SPECT.csv', target_index=0)
#data = Data_from_file('data/mail_reading.csv', target_index=-1)
#data = Data_from_file('data/holiday.csv', num_train=19, target_index=-1)
learner9 = Boosting_learner(data, sp_DT_learner(0.9))
#learner7 = Boosting_learner(data, sp_DT_learner(0.7))
#learner5 = Boosting_learner(data, sp_DT_learner(0.5))
predictor9 = learner9.learn(10)
for i in learner9.offsets:
    print(i.__doc__)
import matplotlib.pyplot as plt


def plot_boosting(data,
                  steps=10,
                  thresholds=[0.5, 0.1, 0.01, 0.001],
예제 #8
0
        error_example(predictor(example), dataset.target(example), to_optimize)
        for example in data_subset)
    return error


from learnProblem import Data_set, Data_from_file


def test(data):
    """Prints errors and the trees for various evaluation criteria and ways to select leaves.
    """
    for crit in Data_set.evaluation_criteria:
        for leaf in selections:
            tree = DT_learner(data, to_optimize=crit,
                              leaf_selection=leaf).learn()
            print("For", crit, "using", leaf, "at leaves, tree built is:",
                  tree.__doc__)
            if data.test:
                for ecrit in Data_set.evaluation_criteria:
                    test_error = data.evaluate_dataset(data.test, tree, ecrit)
                    print("    Average error for", ecrit, "using", leaf,
                          "at leaves is", test_error)


if __name__ == "__main__":
    #print("carbool.csv"); test(data = Data_from_file('data/carbool.csv', target_index=-1))
    # print("SPECT.csv"); test(data = Data_from_file('data/SPECT.csv', target_index=0))
    print("mail_reading.csv")
    test(data=Data_from_file('data/mail_reading.csv', target_index=-1))
    # print("holiday.csv"); test(data = Data_from_file('data/holiday.csv', num_train=19, target_index=-1))
예제 #9
0
def plot_fig_7_15():  # different runs produce different plots
    data = Data_from_file('data/SPECT.csv', target_index=0)
    # data = Data_from_file('data/carbool.csv', target_index=-1)
    plot_error(data)
예제 #10
0
def prod(L):
    """returns the product of the elements of L"""
    res = 1
    for e in L:
        res *= e
    return res


def random_dist(k):
    """generate k random numbers that sum to 1"""
    res = [random.random() for i in range(k)]
    s = sum(res)
    return [v / s for v in res]


data = Data_from_file('data/emdata2.csv', num_train=10, target_index=2000)
eml = EM_learner(data, 2)
num_iter = 2
print("Class assignment after", num_iter, "iterations:")
eml.learn(num_iter)
eml.show_class(0)

# Plot the error
# em2=EM_learner(data,2); em2.plot_error(40)   # 2 classes
# em3=EM_learner(data,3); em3.plot_error(40)   # 3 classes
# em13=EM_learner(data,13); em13.plot_error(40) # 13 classes

# data = Data_from_file('data/carbool.csv', target_index=2000,boolean_features=False)
# [f.frange for f in data.input_features]
# eml = EM_learner(data,3)
# eml.learn(20); eml.show_class(0)
        error_example(predictor(example), dataset.target(example), to_optimize)
        for example in data_subset)
    return error


from learnProblem import Data_set, Data_from_file


def test(data):
    """Prints errors and the trees for various evaluation criteria and ways to select leaves.
    """
    for crit in Data_set.evaluation_criteria:
        for leaf in ("mean", "median"):
            tree = DT_learner(data, to_optimize=crit,
                              leaf_selection=leaf).learn()
            print("For", crit, "using", leaf, "at leaves, tree built is:",
                  tree.__doc__)
            if data.test:
                for ecrit in Data_set.evaluation_criteria:
                    test_error = data.evaluate_dataset(data.test, tree, ecrit)
                    print("    Average error for", ecrit, "using", leaf,
                          "at leaves is", test_error)


if __name__ == "__main__":
    # print("carbool.csv"); test(data = Data_from_file('data/carbool.csv', target_index=-1))
    print("SPECT.csv")
    test(data=Data_from_file('data/pima.txt', target_index=0))
    # print("mail_reading.csv"); test(data = Data_from_file('data/mail_reading.csv', target_index=-1))
    # print("holiday.csv"); test(data = Data_from_file('data/holiday.csv', num_train=19, target_index=-1))
예제 #12
0
# Artificial Intelligence: Foundations of Computational Agents
# http://artint.info
# Copyright David L Poole and Alan K Mackworth 2017.
# This work is licensed under a Creative Commons
# Attribution-NonCommercial-ShareAlike 4.0 International License.
# See: http://creativecommons.org/licenses/by-nc-sa/4.0/deed.en

from learnDT import DT_learner
from learnProblem import Data_set, Data_from_file


def test(data):
    """Prints errors and the trees for various evaluation criteria and ways to select leaves.
    """
    for crit in Data_set.evaluation_criteria:
        for leaf in ("mean", "median"):
            tree = DT_learner(data, to_optimize=crit,
                              leaf_selection=leaf).learn()
            print("For", crit, "using", leaf, "at leaves, tree built is:",
                  tree.__doc__)
            if data.test:
                for ecrit in Data_set.evaluation_criteria:
                    test_error = data.evaluate_dataset(data.test, tree, ecrit)
                    print("    Average error for", ecrit, "using", leaf,
                          "at leaves is", test_error)


if __name__ == "__main__":
    test(data=Data_from_file('pima.txt', target_index=8))
예제 #13
0

def characteristic_error(target, prediction):
    return [
        1 - prediction[i] if target == i else -prediction[i]
        for i in range(len(prediction))
    ]


def sum_squares_error(observed, predicted):
    """Returns the errors for each of the target features.
    """
    return [obsd - pred for obsd, pred in zip(observed, predicted)]


data = Data_from_file('data/training.txt', target_index=-1)
#data = Data_from_file('data/mail_reading_consis.csv', target_index=-1)
#data = Data_from_file('data/SPECT.csv',  prob_test=0.5, target_index=0)
# data = Data_from_file('data/holiday.csv', target_index=-1) #, num_train=19)
nn1 = NN(data)
nn1.add_layer(Linear_complete_layer(nn1, 50))
nn1.add_layer(Sigmoid_layer(nn1))  # comment this or the next
# nn1.add_layer(ReLU_layer(nn1))
nn1.add_layer(Linear_complete_layer(nn1, 10))
nn1.add_layer(Sigmoid_layer(nn1))
nn1.learning_rate = 0.1
# nn1.learn(100)

start_time = time.perf_counter()
plot_steps(learner=nn1,
           data=data,