Ejemplo n.º 1
0
def rbf_answers(k=9, gamma=1.5):
    num_experiments = 500
    num_points = 100
    bounds = [-1, 1, -1, 1]
    
    num_kernel_beat_reg = 0.0
    not_linsep = 0.0
    num_rbf_linsep = 0.0
    sum_E_in_svm = 0.0
    sum_E_out_svm = 0.0
    sum_E_in_rbf = 0.0
    sum_E_out_rbf = 0.0
    for experiment in range(num_experiments):
        #print experiment + 1, "/", num_experiments
        X = random_plane_points(num_points, bounds)
        y = target_fn(X)
        svm_clf = svm.SVC(kernel='rbf', gamma=gamma, C=10**10)
        svm_clf.fit(X, y)
        rbf_clf = rbfClassifier(k, gamma, bounds, bias=True)
        rbf_clf.fit(X, y)
        E_in_svm = np.where(svm_clf.predict(X) != y, 1.0, 0.0).mean()
        E_in_rbf = np.where(rbf_clf.predict(X) != y, 1.0, 0.0).mean()
        X_test = random_plane_points(10000, bounds)
        y_test = target_fn(X_test)
        E_out_svm = np.where(svm_clf.predict(X_test) != y_test, 1.0, 0.0).mean()
        E_out_rbf = np.where(rbf_clf.predict(X_test) != y_test, 1.0, 0.0).mean()

        # Statistics
        if E_in_svm != 0:    # Throw out run
            not_linsep += 1
            continue
        if E_in_rbf == 0:
            num_rbf_linsep += 1
        if E_out_svm < E_out_rbf:
            num_kernel_beat_reg += 1
        sum_E_in_svm += E_in_svm
        sum_E_out_svm += E_out_svm
        sum_E_in_rbf += E_in_rbf
        sum_E_out_rbf += E_out_rbf

    good_runs = num_experiments - not_linsep
    print("k =", k, "gamma =", gamma)
    print("Not linearly separable:     ", not_linsep/num_experiments)
    print("Kernel beats regular:       ", num_kernel_beat_reg/good_runs)
    print("Regular linearly separable: ", num_rbf_linsep/good_runs)
    print("Average value of E_in_svm:  ", sum_E_in_svm/good_runs)
    print("Average value of E_out_svm: ", sum_E_out_svm/good_runs)
    print("Average value of E_in_rbf:  ", sum_E_in_rbf/good_runs)
    print("Average value of E_out_rbf: ", sum_E_out_rbf/good_runs)
Ejemplo n.º 2
0
def test_random_gens():
    for dimension in range(2, 5):
        bounds = datagen.unit_bounds(dimension)
        random_points = datagen.random_plane_points(4, bounds)
        assert np.array_equal(random_points.shape, (4, dimension))
        
        random_line = datagen.random_hyperplane(bounds)
        assert len(random_line) == dimension + 1
Ejemplo n.º 3
0
def test_all_dichotomies(num_points, max_iter, dichotomies, pla):
    X = random_plane_points(num_points, pla.bounds)
    passed = True
    for i, dichotomy in enumerate(dichotomies):
        pla.fit(X, dichotomy, maxiter=max_iter)
        if pla.num_iters == max_iter:
            #print dichotomy, np.sign(np.dot(pla.X, pla.weights)), pla.num_iters
            passed = False
            break
    return passed
Ejemplo n.º 4
0
def cluster_centers(X, k, bounds):
    """Returns mu for k-means on X."""

    mu = random_plane_points(k, bounds)
    kmeans = sklearn.cluster.KMeans(init=mu)
    kmeans.fit(X)

    # Restart if a mean has no associated points
    if len(set(kmeans.labels_)) < k:
        return cluster_centers(X, k, bounds)

    return kmeans.cluster_centers_
Ejemplo n.º 5
0
def cross_entropy_randomized_Eout(f, g, bounds, N=10000):

    """Computes cross entropy out of sample error for linear model.

    The cross entropy error is similar to the linear_Eout error, but
    instead of simply taking the proportion of wrong labels, we use cross
    entropy to get an error.

    Args:
        f, g: two lines defined by weights (with bias term)
        bounds: bounds used to generate random points
        N: number of points to generate

    Returns:
        Cross entropy error from g to f.
    """

    X = np.hstack([np.ones((N, 1)), random_plane_points(N, bounds)])
    labels_f = np.sign(np.dot(X, f))
    return cross_entropy_error(X, labels_f, g)
Ejemplo n.º 6
0
def linear_randomized_Eout(f, g, bounds, N=10000):

    """Computes out of sample error for two linear weights.

    This function compute the out of sample error E_out by generating
    N random points in the given bounds, and finding the percentage difference
    in labels from the two lines defined by f and g.

    Args:
        f, g: two lines defined by weights (with bias term)
        bounds: bounds used to generate random points
        N: number of points to generate

    Returns:
        Proportion of generated points whose labels differ from g to f.
    """

    X = np.hstack([np.ones((N, 1)), random_plane_points(N, bounds)])
    labels_f = np.sign(np.dot(X, f))
    labels_g = np.sign(np.dot(X, g))
    return np.where(labels_f==labels_g, 0, 1.0).mean()