def run_linear_regression(N_samples, N_points): '''runs on N_samples and with N_points a linear regression computes Ein by average of the samples as well as Eout ''' print 'running Linear Regression on %s samples' % str(N_samples) print 'Each sample has %s data points' % str(N_points) Ein_avg = [] Eout_avg = [] for i in range(N_samples): d = data(N_points) l = randomline() f = target_function(l) t_set = build_training_set(d, f) wlin, X, y = linear_regression(N_points, t_set) Ein = compute_Ein(wlin, X, y) Ein_avg.append(Ein) Eout = compute_Eout(wlin, f, N_points) Eout_avg.append(Eout) print_avg('Ein', Ein_avg) print_avg('Eout', Eout_avg)
def run_linear_regression(N_samples,N_points): '''runs on N_samples and with N_points a linear regression computes Ein by average of the samples as well as Eout ''' print 'running Linear Regression on %s samples' %str(N_samples) print 'Each sample has %s data points' %str(N_points) Ein_avg = [] Eout_avg = [] for i in range(N_samples): d = data(N_points) l = randomline() f = target_function(l) t_set = build_training_set(d,f) wlin,X,y = linear_regression(N_points,t_set) Ein = compute_Ein(wlin,X,y) Ein_avg.append(Ein) Eout = compute_Eout(wlin,f,N_points) Eout_avg.append(Eout) print_avg('Ein',Ein_avg) print_avg('Eout',Eout_avg)
def run_lr_and_pla(N_samples, N_points): '''runs on N_samples and with N_points a linear regresion then from the weight vector runs PLA algorithm compute the average number of iterations of PLA with this w vector ''' print 'running Linear Regression on %s samples' % N_samples print 'Each samples has %s data points' % N_points iteration_avg = [] for i in range(N_samples): d = data(N_points) l = randomline() f = target_function(l) t_set = build_training_set(d, f) wlin, X, y = linear_regression(N_points, t_set) w_pla, iteration = PLA(N_points, wlin, f, t_set) iteration_avg.append(iteration) print_avg('Number of iterations', iteration_avg)
def run_lr_and_pla(N_samples, N_points): '''runs on N_samples and with N_points a linear regresion then from the weight vector runs PLA algorithm compute the average number of iterations of PLA with this w vector ''' print 'running Linear Regression on %s samples' %N_samples print 'Each samples has %s data points' %N_points iteration_avg = [] for i in range(N_samples): d = data(N_points) l = randomline() f = target_function(l) t_set = build_training_set(d,f) wlin,X,y = linear_regression(N_points,t_set) w_pla,iteration = PLA(N_points,wlin,f,t_set) iteration_avg.append(iteration) print_avg('Number of iterations',iteration_avg)
def run_PLA(N_samples, N_points): samples = [] # vector of 1 clasified, 0 misclassified iterations = [] #vector of iterations needed for each PLA b_misclassified = False diff = [] #vector of difference average between f and g for i in range(N_samples): # run PLA in sample d = data(N_points) l = randomline() f = target_function(l) t_set = build_training_set(d, f) w = [0, 0, 0] #Start the PLA with the weight vector w being all zeros w, iteration = PLA(N_points, w, f, t_set) iterations.append(iteration) # check if points are classified or not for i in range(len(t_set)): point = t_set[i][0] s = h(w, point) yn = t_set[i][1] if yn != s: samples.append(0) b_misclassified = True break # check difference between f and g diff.append(evaluate_diff_f_g(f, w)) if not b_misclassified: samples.append(1) b_misclassified = False print 'number of samples misclassified: %s ' % samples.count(0) print 'number of classified samples: %s ' % samples.count(1) print 'number of iteration avg: %s ' % (str( sum(iterations) / len(iterations) * 1.0)) print 'average of difference in function g: %s' % (sum(diff) / (len(diff) * 1.0))
def run_PLA(N_samples,N_points): samples = []# vector of 1 clasified, 0 misclassified iterations = []#vector of iterations needed for each PLA b_misclassified = False diff = []#vector of difference average between f and g for i in range(N_samples): # run PLA in sample d = data(N_points) l = randomline() f = target_function(l) t_set = build_training_set(d,f) w = [0,0,0] w,iteration = PLA(N_points,w,f,t_set) iterations.append(iteration) # check if points are classified or not for i in range(len(t_set)): point = t_set[i][0] s = h(w,point) yn = t_set[i][1] if yn != s: samples.append(0) b_misclassified = True break # check difference between f and g diff.append(evaluate_diff_f_g(f,w)) if not b_misclassified: samples.append(1) b_misclassified = False print 'number of samples misclassified: %s ' % samples.count(0) print 'number of classified samples: %s ' % samples.count(1) print 'number of iteration avg: %s ' % (str(sum(iterations)/len(iterations)*1.0)) print 'average of difference in function g: %s' % ( sum(diff)/(len(diff)*1.0) )
def run_pla_vs_svm(nbruns=1, N=10): solvers.options['show_progress'] = False d = [] l = 0 f = 0 t_set = [] y = [] svm_vs_pla = [] for i in range(nbruns): onBothSides = False while (not onBothSides): d = data(N) l = randomline() f = target_function(l) t_set = build_training_set(d, f) y = target_vector(t_set) if (1 in y) and (-1 in y): onBothSides = True else: onBothSides = False w = [0, 0, 0] w_pla, iteration = PLA(N, w, f, t_set) plaEout = evaluate_diff_f_g(f, w_pla) X_matrix = input_data_matrix(t_set) dimension = len(X_matrix[0]) #identity matrix of size dim X dim matrix x,I,J,typecode double P = spmatrix(1, range(dimension), range(dimension), tc='d') #vector of zeros of size dim, typecode double q = matrix([0] * (dimension), tc='d') mat = [] for t in t_set: y = t[1] temp = [x * -1.0 * y for x in t[0]] mat.append(temp) G = matrix(mat, tc='d') G = G.trans() # vectors of -1 of size t_set h = matrix([-1] * len(t_set), tc='d') #http://abel.ee.ucla.edu/cvxopt/examples/tutorial/qp.html qp_sln = solvers.qp(P, q, G, h) wsvm = list(qp_sln['x']) # number of support vectors you can get at each run count_sv = 0 for t in t_set: wsvm = array(wsvm) x = array(t[0]) y = t[1] res = fabs(y * dot(wsvm, x) - 1) if res < 0.001: count_sv = count_sv + 1 #print count_sv # Eout of svm svmEout = computeEout_svm(f, wsvm) #print 'svmEout: %s'%svmEout if (svmEout < plaEout): svm_vs_pla.append([True, count_sv]) else: svm_vs_pla.append([False, count_sv]) print "svm win pla %f" % (len(filter(lambda a: a[0] is True, svm_vs_pla)) * 1.0 / N) percent_svm_won = len([r[0] for r in svm_vs_pla if r[0] is True ]) * 1.0 / len(svm_vs_pla) print "question 9: svm beat pla %f percent of the time" % ( percent_svm_won * 100) avg_sv = sum([a[1] for a in svm_vs_pla]) * 1.0 / len(svm_vs_pla) print "avg sv:", avg_sv
def run_pla_vs_svm(nbruns = 1, N = 10): solvers.options['show_progress'] = False d = [] l = 0 f = 0 t_set = [] y = [] svm_vs_pla = [] for i in range(nbruns): onBothSides = False while(not onBothSides): d = data(N) l = randomline() f = target_function(l) t_set = build_training_set(d,f) y = target_vector(t_set) if (1 in y) and (-1 in y): onBothSides = True else: onBothSides = False w = [0,0,0] w_pla,iteration = PLA(N,w,f,t_set) plaEout = evaluate_diff_f_g(f,w_pla) X_matrix = input_data_matrix(t_set) dimension = len(X_matrix[0]) #identity matrix of size dim X dim matrix x,I,J,typecode double P = spmatrix(1, range(dimension), range(dimension), tc='d') #vector of zeros of size dim, typecode double q = matrix([0]*(dimension), tc='d') mat = [] for t in t_set: y = t[1] temp = [x * -1.0*y for x in t[0]] mat.append(temp) G = matrix(mat, tc='d') G = G.trans() # vectors of -1 of size t_set h = matrix([-1]*len(t_set), tc='d') #http://abel.ee.ucla.edu/cvxopt/examples/tutorial/qp.html qp_sln = solvers.qp(P, q, G, h) wsvm = list(qp_sln['x']) # number of support vectors you can get at each run count_sv = 0 for t in t_set: wsvm = array(wsvm) x = array(t[0]) y = t[1] res = fabs(y*dot(wsvm,x)-1) if res < 0.001: count_sv = count_sv + 1 #print count_sv # Eout of svm svmEout = computeEout_svm(f,wsvm) #print 'svmEout: %s'%svmEout if(svmEout < plaEout): svm_vs_pla.append([True,count_sv]) else: svm_vs_pla.append([False,count_sv]) print "svm win pla %f" % (len(filter(lambda a: a[0] is True, svm_vs_pla))*1.0/N) percent_svm_won = len([r[0] for r in svm_vs_pla if r[0] is True])*1.0/len(svm_vs_pla) print "question 9: svm beat pla %f percent of the time" % (percent_svm_won*100) avg_sv = sum([a[1] for a in svm_vs_pla])*1.0/len(svm_vs_pla) print "avg sv:", avg_sv