def compute_Eout_from_data(w,t_set_out,N_points): 'number of out-of-sample points misclassifed/total number of out-of-sample points from data' X_matrix = input_data_matrix(t_set_out) y_vector = target_vector(t_set_out) g_vector = dot(X_matrix,w) for i in range(len(g_vector)): g_vector[i] = sign(g_vector[i]) vEout = g_vector - y_vector nEout = 0 for i in range(len(vEout)): if vEout[i]!=0: nEout = nEout + 1 Eout = nEout/(len(vEout)*1.0) return Eout
def compute_Eout_from_data(w, t_set_out, N_points): 'number of out-of-sample points misclassifed/total number of out-of-sample points from data' X_matrix = input_data_matrix(t_set_out) y_vector = target_vector(t_set_out) g_vector = dot(X_matrix, w) for i in range(len(g_vector)): g_vector[i] = sign(g_vector[i]) vEout = g_vector - y_vector nEout = 0 for i in range(len(vEout)): if vEout[i] != 0: nEout = nEout + 1 Eout = nEout / (len(vEout) * 1.0) return Eout
def compute_Eout_nonlineartrans(wlin,outdata): N_points = len(outdata) t_set_trans = transform_t_set(outdata) X_matrix = input_data_matrix(t_set_trans) y_vector = target_vector(t_set_trans) g_vector = dot(X_matrix,wlin) for i in range(len(g_vector)): g_vector[i] = sign(g_vector[i]) vEout = g_vector - y_vector nEout = 0 for i in range(len(vEout)): if vEout[i] != 0: nEout = nEout+1 Eout = nEout / (len(vEout)*1.0) return Eout
def compute_Eout_nonlineartrans(wlin, outdata): N_points = len(outdata) t_set_trans = transform_t_set(outdata) X_matrix = input_data_matrix(t_set_trans) y_vector = target_vector(t_set_trans) g_vector = dot(X_matrix, wlin) for i in range(len(g_vector)): g_vector[i] = sign(g_vector[i]) vEout = g_vector - y_vector nEout = 0 for i in range(len(vEout)): if vEout[i] != 0: nEout = nEout + 1 Eout = nEout / (len(vEout) * 1.0) return Eout
def compute_Eout(wlin, f, N_points): 'number of out-of-sample points misclassifed / total number of out-of-sample points' d = data(N_points) t_set = build_training_set(d, f) X_matrix = input_data_matrix(t_set) y_vector = target_vector(t_set) g_vector = dot(X_matrix, wlin) for i in range(len(g_vector)): g_vector[i] = sign(g_vector[i]) vEout = g_vector - y_vector nEout = 0 for i in range(len(vEout)): if vEout[i] != 0: nEout = nEout + 1 Eout = nEout / (len(vEout) * 1.0) return Eout
def compute_Eout(wlin,f,N_points): 'number of out-of-sample points misclassifed / total number of out-of-sample points' d = data(N_points) t_set = build_training_set(d,f) X_matrix = input_data_matrix(t_set) y_vector = target_vector(t_set) g_vector = dot(X_matrix,wlin) for i in range(len(g_vector)): g_vector[i] = sign(g_vector[i]) vEout = g_vector - y_vector nEout = 0 for i in range(len(vEout)): if vEout[i]!=0: nEout = nEout + 1 Eout = nEout/(len(vEout)*1.0) return Eout
def compute_Eout_nonlineartrans(w, f, N_points): 'number of out-of-sample points misclassifed / total number of out-of-sample points' # generate N fresh points (f will not change) with noise t_set, f = generate_t_set(N_points, f) t_set_trans = transform_t_set(t_set) X_matrix = input_data_matrix(t_set_trans) y_vector = target_vector(t_set_trans) g_vector = dot(X_matrix, w) for i in range(len(g_vector)): g_vector[i] = sign(g_vector[i]) vEout = g_vector - y_vector nEout = 0 for i in range(len(vEout)): if vEout[i] != 0: nEout = nEout + 1 Eout = nEout / (len(vEout) * 1.0) return Eout
def compute_Eout_nonlineartrans(w,f,N_points): 'number of out-of-sample points misclassifed / total number of out-of-sample points' # generate N fresh points (f will not change) with noise t_set,f = generate_t_set(N_points,f) t_set_trans = transform_t_set(t_set) X_matrix = input_data_matrix(t_set_trans) y_vector = target_vector(t_set_trans) g_vector = dot(X_matrix,w) for i in range(len(g_vector)): g_vector[i] = sign(g_vector[i]) vEout = g_vector - y_vector nEout = 0 for i in range(len(vEout)): if vEout[i]!=0: nEout = nEout + 1 Eout = nEout/(len(vEout)*1.0) return Eout
def run_validation(indata_train, indata_val, outdata): dict_t_set = {} dict_wlin = {} dict_t_setval = {} dict_yval = {} dict_Xval = {} dict_Eval = {} dict_Eout = {} dict_outdata = {} #t_set train with transformation for i in range(3, 8): dict_t_set[i] = transform_t_set(indata_train, i) #linear regression for i in range(3, 8): t_set = dict_t_set[i] size_t_set = len(t_set) wlin, X, y = linear_regression(size_t_set, t_set) dict_wlin[i] = wlin #t_set validation for i in range(3, 8): t_setval = transform_t_set(indata_val, i) dict_t_setval[i] = t_setval for i in range(3, 8): t_setval = dict_t_setval[i] yval = target_vector(t_setval) dict_yval[i] = yval Xval = input_data_matrix(t_setval) dict_Xval[i] = Xval #Eval for i in range(3, 8): wlin = dict_wlin[i] Xval = dict_Xval[i] yval = dict_yval[i] Eval = compute_Eval(wlin, Xval, yval) dict_Eval[i] = Eval #Eout for i in range(3, 8): curr_outdata = transform_t_set(outdata, i) dict_outdata[i] = curr_outdata for i in range(3, 8): wlin = dict_wlin[i] curr_outdata = dict_outdata[i] eout = compute_Eout_from_data(wlin, curr_outdata, len(curr_outdata)) dict_Eout[i] = eout for i in range(3, 8): Eval = dict_Eval[i] Eout = dict_Eout[i] print 'Eval for k = %s is: %s' % (i, Eval) print 'Eout for k = %s is: %s' % (i, Eout) print ''
def run_pla_vs_svm(nbruns=1, N=10): solvers.options['show_progress'] = False d = [] l = 0 f = 0 t_set = [] y = [] svm_vs_pla = [] for i in range(nbruns): onBothSides = False while (not onBothSides): d = data(N) l = randomline() f = target_function(l) t_set = build_training_set(d, f) y = target_vector(t_set) if (1 in y) and (-1 in y): onBothSides = True else: onBothSides = False w = [0, 0, 0] w_pla, iteration = PLA(N, w, f, t_set) plaEout = evaluate_diff_f_g(f, w_pla) X_matrix = input_data_matrix(t_set) dimension = len(X_matrix[0]) #identity matrix of size dim X dim matrix x,I,J,typecode double P = spmatrix(1, range(dimension), range(dimension), tc='d') #vector of zeros of size dim, typecode double q = matrix([0] * (dimension), tc='d') mat = [] for t in t_set: y = t[1] temp = [x * -1.0 * y for x in t[0]] mat.append(temp) G = matrix(mat, tc='d') G = G.trans() # vectors of -1 of size t_set h = matrix([-1] * len(t_set), tc='d') #http://abel.ee.ucla.edu/cvxopt/examples/tutorial/qp.html qp_sln = solvers.qp(P, q, G, h) wsvm = list(qp_sln['x']) # number of support vectors you can get at each run count_sv = 0 for t in t_set: wsvm = array(wsvm) x = array(t[0]) y = t[1] res = fabs(y * dot(wsvm, x) - 1) if res < 0.001: count_sv = count_sv + 1 #print count_sv # Eout of svm svmEout = computeEout_svm(f, wsvm) #print 'svmEout: %s'%svmEout if (svmEout < plaEout): svm_vs_pla.append([True, count_sv]) else: svm_vs_pla.append([False, count_sv]) print "svm win pla %f" % (len(filter(lambda a: a[0] is True, svm_vs_pla)) * 1.0 / N) percent_svm_won = len([r[0] for r in svm_vs_pla if r[0] is True ]) * 1.0 / len(svm_vs_pla) print "question 9: svm beat pla %f percent of the time" % ( percent_svm_won * 100) avg_sv = sum([a[1] for a in svm_vs_pla]) * 1.0 / len(svm_vs_pla) print "avg sv:", avg_sv
def run_validation(indata_train,indata_val,outdata): dict_t_set = {} dict_wlin = {} dict_t_setval = {} dict_yval = {} dict_Xval = {} dict_Eval = {} dict_Eout = {} dict_outdata = {} #t_set train with transformation for i in range(3,8): dict_t_set[i] = transform_t_set(indata_train,i) #linear regression for i in range(3,8): t_set = dict_t_set[i] size_t_set = len(t_set) wlin,X,y = linear_regression(size_t_set,t_set) dict_wlin[i] = wlin #t_set validation for i in range(3,8): t_setval = transform_t_set(indata_val,i) dict_t_setval[i] = t_setval for i in range(3,8): t_setval = dict_t_setval[i] yval = target_vector(t_setval) dict_yval[i] = yval Xval = input_data_matrix(t_setval) dict_Xval[i] = Xval #Eval for i in range(3,8): wlin = dict_wlin[i] Xval = dict_Xval[i] yval = dict_yval[i] Eval = compute_Eval(wlin,Xval,yval) dict_Eval[i] = Eval #Eout for i in range(3,8): curr_outdata = transform_t_set(outdata,i) dict_outdata[i] = curr_outdata for i in range(3,8): wlin = dict_wlin[i] curr_outdata = dict_outdata[i] eout = compute_Eout_from_data(wlin,curr_outdata,len(curr_outdata)) dict_Eout[i] = eout for i in range(3,8): Eval = dict_Eval[i] Eout = dict_Eout[i] print 'Eval for k = %s is: %s'%(i,Eval) print 'Eout for k = %s is: %s'%(i,Eout) print ''
def run_pla_vs_svm(nbruns = 1, N = 10): solvers.options['show_progress'] = False d = [] l = 0 f = 0 t_set = [] y = [] svm_vs_pla = [] for i in range(nbruns): onBothSides = False while(not onBothSides): d = data(N) l = randomline() f = target_function(l) t_set = build_training_set(d,f) y = target_vector(t_set) if (1 in y) and (-1 in y): onBothSides = True else: onBothSides = False w = [0,0,0] w_pla,iteration = PLA(N,w,f,t_set) plaEout = evaluate_diff_f_g(f,w_pla) X_matrix = input_data_matrix(t_set) dimension = len(X_matrix[0]) #identity matrix of size dim X dim matrix x,I,J,typecode double P = spmatrix(1, range(dimension), range(dimension), tc='d') #vector of zeros of size dim, typecode double q = matrix([0]*(dimension), tc='d') mat = [] for t in t_set: y = t[1] temp = [x * -1.0*y for x in t[0]] mat.append(temp) G = matrix(mat, tc='d') G = G.trans() # vectors of -1 of size t_set h = matrix([-1]*len(t_set), tc='d') #http://abel.ee.ucla.edu/cvxopt/examples/tutorial/qp.html qp_sln = solvers.qp(P, q, G, h) wsvm = list(qp_sln['x']) # number of support vectors you can get at each run count_sv = 0 for t in t_set: wsvm = array(wsvm) x = array(t[0]) y = t[1] res = fabs(y*dot(wsvm,x)-1) if res < 0.001: count_sv = count_sv + 1 #print count_sv # Eout of svm svmEout = computeEout_svm(f,wsvm) #print 'svmEout: %s'%svmEout if(svmEout < plaEout): svm_vs_pla.append([True,count_sv]) else: svm_vs_pla.append([False,count_sv]) print "svm win pla %f" % (len(filter(lambda a: a[0] is True, svm_vs_pla))*1.0/N) percent_svm_won = len([r[0] for r in svm_vs_pla if r[0] is True])*1.0/len(svm_vs_pla) print "question 9: svm beat pla %f percent of the time" % (percent_svm_won*100) avg_sv = sum([a[1] for a in svm_vs_pla])*1.0/len(svm_vs_pla) print "avg sv:", avg_sv