def compare_pla_svc(N, N_runs=1000): g_svm_isbetter = np.zeros(N_runs) n_support = np.zeros([N_runs, 2]) for i in range(N_runs): #Generate N points in the range [-1, 1] x [-1, 1] while True: X = tools.generate_points(N) #Choose a random line in 2D as a target function f = tools.choose_boundary() #Assign label to X y = tools.evaluate_output(X, f) if abs(sum(y)) != y.shape[0]: break #Visualize data and target function #fig, ax = plt.subplots() #tools.visualize_points(X, y, ax) #tools.visualize_line(f, 'k', ax) #Fit Perceptron Learning Algorithm g_pla = tools.perceptron_learning(X, y) #Visualize the learned PLA function #tools.visualize_line(g_pla, '--k', ax) #Generate out-of-sample data N_out = 10000 X_out = tools.generate_points(N_out) y_out = tools.evaluate_output(X_out, f) #Evaluate E_out of PLA y_pla_pred = tools.evaluate_output(X_out, g_pla) E_out_pla = tools.cal_error(y_out, y_pla_pred) #Visualize out-of-sample data #tools.visualize_points(X_out, y_out, ax, 's') #tools.visualize_points(X_out, y_out_pred, ax, 'x') #Create Linear Support Vector Classification svc = SVC(kernel='linear', C=1000) svc.fit(X, y) g_svc = np.concatenate([svc.intercept_, svc.coef_[0]]) n_support[i, :] = svc.n_support_ #Visualize the learned SVC function #tools.visualize_line(g_svc, ':k', ax) #Evaluate E_out of PLA y_svc_pred = tools.evaluate_output(X_out, g_svc) E_out_svc = tools.cal_error(y_out, y_svc_pred) g_svm_isbetter[i] = (E_out_svc < E_out_pla) return g_svm_isbetter.mean(), n_support.sum(axis=1).mean()
def logistic_regression_one_run(eta=0.01, visualize=False): X = tools.generate_points(100) f = tools.choose_boundary() y = tools.evaluate_output(X, f) #Fit logistic Regression using stochastic gradient descent w, num_iter = tools.logistic_regression(X, y, eta) #Estimate E_out by generating separate set of points to evaluate error X_out = tools.generate_points(1000) y_out = tools.evaluate_output(X_out, f) E_out = tools.error_measure_log(X_out, y_out, w) if visualize == True: fig, ax = plt.subplots() tools.visualize_points(X, y, ax) tools.visualize_line(f, '-k', ax) tools.visualize_line(w, '--k', ax) plt.grid("off") sns.despine(bottom=True, left=True) plt.show() return E_out, num_iter
import matplotlib.pyplot as plt plt.style.use("seaborn") import numpy as np from math import pi #target function def f(X): return np.sign(X[:, 1] - X[:, 0] + 0.25 * np.sin(pi * X[:, 0])) Nruns = 10000 Ein_eq_0 = 0 for i in range(Nruns): X = generate_points() #Evaluate outputs for samples in X, f(x) = sign(x2 - x1 + 0.25 * sin(pi * x1)) y = f(X) #RBF-normal (clustering -> gaussian RBF -> linear regression) n_clusters = 9 normal = RBF_normal(K=n_clusters) normal.fit(X, y) #Evaluate E_in E_in = 1 - normal.score(X, y) if E_in < 10**-3:
import tools data = ([20, 2], [40, 4], [80, 8], [30, 2.5], [70, 5], [80, 6]) data += tools.generate_points(100) for i in range(1, 8, 1): print("degré "+str(i)+" -> "+str(tools.MSE(data, tools.poly_numpy(data, i)))) tools.plot_multi_poly(data, [tools.poly_numpy(data, 1), tools.poly_numpy(data, 2), tools.poly_numpy(data, 3), tools.poly_numpy(data, 4), tools.poly_numpy(data, 5),tools.poly_numpy(data, 6)])
from math import pi import pandas as pd #target function def f(X): return np.sign(X[:, 1] - X[:, 0] + 0.25 * np.sin(pi * X[:, 0])) Nruns = 1000 Ein_trend = [] Eout_trend = [] for i in range(Nruns): X = generate_points() #Evaluate outputs for samples in X, f(x) = sign(x2 - x1 + 0.25 * sin(pi * x1)) y = f(X) #RBF-normal (clustering -> gaussian RBF -> linear regression) n_clusters_1 = 9 n_clusters_2 = 12 normal_1 = RBF_normal(K=n_clusters_1) normal_2 = RBF_normal(K=n_clusters_2) normal_1.fit(X, y) normal_2.fit(X, y) #Evaluate E_in