Example #1
0
def plot_svm():
    num_points = 10
    (X, y, f) = random_linearly_separable_data(num_points, bounds)
    (w_svm, vectors) = linsep_svm(X, y)
    
    x_plot = X[:,0]
    y_plot = X[:,1]
    s = 20*np.ones(num_points)
    s[vectors] = 60

    pla = PLA(bounds=bounds)
    pla.fit(X, y)
    w_pla = pla.weights
    
    # test values for b
    print("Average b:",  w_svm[0])
    each_b = y[vectors] - np.dot(X[vectors], w_svm[1:])
    print("Individual b's:",  each_b)

    (x_f, y_f) = weights_to_mxb_2D(f, bounds)
    (x_w, y_w) = weights_to_mxb_2D(w_svm, bounds)
    (x_p, y_p) = weights_to_mxb_2D(w_pla, bounds)
    c = np.where(y==1, 'r', 'b')
    plt.scatter(x_plot,y_plot, s, c=c)
    plt.plot(x_f, y_f, 'k-.')
    plt.plot(x_w, y_w, 'b')
    plt.plot(x_p, y_p, 'r--')
    plt.xlim([-1, 1])
    plt.ylim([-1, 1])
    plt.grid()
    plt.show()
Example #2
0
def test_linearly_separable():
    "Maybe stick a PLA solver in here too?"
    num_points = 100
    for dimension in range(2, 5):
        bounds = datagen.unit_bounds(dimension)
        (X, y, weights) = datagen.random_linearly_separable_data(num_points, 
                                                                 bounds)
        X_in = np.column_stack([np.ones((num_points, 1)), X])
        assert np.all(y == np.sign(np.dot(X_in, weights)))
Example #3
0
def linsep_logistic(num_points=100, num_experiments=100, tol=0.01, eta=0.01, max_iter=1000):
    
    all_epochs = np.zeros(num_experiments)
    all_E_out = np.zeros(num_experiments)

    for i in range(num_experiments):
        (X, y, f) = random_linearly_separable_data(num_points, bounds)
        (weights, num_epochs) = logistic_gradient_descent(X, y, tol, eta, max_iter)
        E_out = cross_entropy_randomized_Eout(f, weights, bounds)
        all_epochs[i] = num_epochs
        all_E_out[i] = E_out
        print(i+1, num_epochs, E_out)

    return all_epochs.mean(), all_E_out.mean()
Example #4
0
def test(num_points=100, tol=0.01, eta=0.01, max_iter=2000):
    (X, y, f) = random_linearly_separable_data(num_points, bounds)
    (w, num) = logistic_gradient_descent(X, y, tol, eta, max_iter)

    print(num, cross_entropy_randomized_Eout(f, w, bounds))
    
    positives = X[np.where(y==1)]
    negatives = X[np.where(y==-1)]

    x_p = positives[:,0]
    y_p = positives[:,1]
    x_n = negatives[:,0]
    y_n = negatives[:,1]
    (w_x, w_y) = (bounds[:2], -np.array(bounds[:2])*w[1]/w[2] - w[0]/w[2])
    (f_x, f_y) = (bounds[:2], -np.array(bounds[:2])*f[1]/f[2] - f[0]/f[2])
    plt.plot(w_x, w_y, c='b')
    plt.plot(f_x, f_y, c='k')
    plt.scatter(x_p, y_p, c='b', marker='o')
    plt.scatter(x_n, y_n, c='r', marker='o')
    plt.xlim(bounds[:2])
    plt.ylim(bounds[2:4])
    plt.show()
Example #5
0
def answers():
    num_points = 10
    num_experiments = 1000
    times_svm_better = 0.0
    total_num_vectors = 0.0

    for i in range(num_experiments):
        (X, y, f) = random_linearly_separable_data(num_points, bounds)
        (w_svm, vectors) = linsep_svm(X, y)

        pla = PLA(bounds=bounds)
        pla.fit(X, y)
        w_pla = pla.weights

        E_svm = linear_randomized_Eout(f, w_svm, bounds)
        E_pla = linear_randomized_Eout(f, w_pla, bounds)
        if E_svm < E_pla:
            times_svm_better += 1
        total_num_vectors += len(vectors)

    print("Number of points used:            ", num_points)
    print("Proportion of times SVM beats PLA:", times_svm_better/num_experiments)
    print("Average number of support vectors:", total_num_vectors/num_experiments)