Example #1
0
def run_linear_regression(N_samples,N_points):
    '''runs on N_samples and with N_points a linear regression
    computes Ein by average of the samples as well as Eout
    '''
    print 'running Linear Regression on %s samples' %str(N_samples)
    print 'Each sample has %s data points' %str(N_points)

    Ein_avg = []
    Eout_avg = []

    for i in range(N_samples):

        d = data(N_points)
        l = randomline()
        f = target_function(l)
        t_set = build_training_set(d,f)

        wlin,X,y = linear_regression(N_points,t_set)

        Ein = compute_Ein(wlin,X,y)
        Ein_avg.append(Ein)

        Eout = compute_Eout(wlin,f,N_points)
        Eout_avg.append(Eout)
        
    print_avg('Ein',Ein_avg)
    print_avg('Eout',Eout_avg)
Example #2
0
def run_linear_regression(N_samples, N_points):
    '''runs on N_samples and with N_points a linear regression
    computes Ein by average of the samples as well as Eout
    '''
    print 'running Linear Regression on %s samples' % str(N_samples)
    print 'Each sample has %s data points' % str(N_points)

    Ein_avg = []
    Eout_avg = []

    for i in range(N_samples):

        d = data(N_points)
        l = randomline()
        f = target_function(l)
        t_set = build_training_set(d, f)

        wlin, X, y = linear_regression(N_points, t_set)

        Ein = compute_Ein(wlin, X, y)
        Ein_avg.append(Ein)

        Eout = compute_Eout(wlin, f, N_points)
        Eout_avg.append(Eout)

    print_avg('Ein', Ein_avg)
    print_avg('Eout', Eout_avg)
Example #3
0
def compute_Eout(wlin, f, N_points):
    'number of out-of-sample points misclassifed / total number of out-of-sample points'

    d = data(N_points)
    t_set = build_training_set(d, f)

    X_matrix = input_data_matrix(t_set)
    y_vector = target_vector(t_set)

    g_vector = dot(X_matrix, wlin)
    for i in range(len(g_vector)):
        g_vector[i] = sign(g_vector[i])

    vEout = g_vector - y_vector
    nEout = 0
    for i in range(len(vEout)):
        if vEout[i] != 0:
            nEout = nEout + 1
    Eout = nEout / (len(vEout) * 1.0)
    return Eout
Example #4
0
def compute_Eout(wlin,f,N_points):
    'number of out-of-sample points misclassifed / total number of out-of-sample points'
    
    d = data(N_points)
    t_set = build_training_set(d,f)
    
    X_matrix = input_data_matrix(t_set)
    y_vector = target_vector(t_set)
    
    g_vector = dot(X_matrix,wlin)
    for i in range(len(g_vector)):
        g_vector[i] = sign(g_vector[i])
    
    vEout = g_vector - y_vector
    nEout = 0
    for i in range(len(vEout)):
        if vEout[i]!=0:
            nEout = nEout + 1
    Eout = nEout/(len(vEout)*1.0)
    return Eout
Example #5
0
def run_lr_and_pla(N_samples, N_points):
    '''runs on N_samples and with N_points a linear regresion
    then from the weight vector runs PLA algorithm
    compute the average number of iterations of PLA with this w vector
    '''
    print 'running Linear Regression on %s samples' % N_samples
    print 'Each samples has %s data points' % N_points

    iteration_avg = []
    for i in range(N_samples):

        d = data(N_points)
        l = randomline()
        f = target_function(l)
        t_set = build_training_set(d, f)

        wlin, X, y = linear_regression(N_points, t_set)

        w_pla, iteration = PLA(N_points, wlin, f, t_set)
        iteration_avg.append(iteration)

    print_avg('Number of iterations', iteration_avg)
Example #6
0
def run_lr_and_pla(N_samples, N_points):
    '''runs on N_samples and with N_points a linear regresion
    then from the weight vector runs PLA algorithm
    compute the average number of iterations of PLA with this w vector
    '''
    print 'running Linear Regression on %s samples' %N_samples
    print 'Each samples has %s data points' %N_points
    
    iteration_avg = []
    for i in range(N_samples):

        d = data(N_points)
        l = randomline()
        f = target_function(l)
        t_set = build_training_set(d,f)
        
        wlin,X,y = linear_regression(N_points,t_set)
        
        w_pla,iteration = PLA(N_points,wlin,f,t_set)
        iteration_avg.append(iteration)
    
    print_avg('Number of iterations',iteration_avg)
Example #7
0
def run_PLA(N_samples, N_points):
    samples = []  # vector of 1 clasified, 0 misclassified
    iterations = []  #vector of iterations needed for each PLA
    b_misclassified = False
    diff = []  #vector of difference average between f and g

    for i in range(N_samples):
        # run PLA in sample
        d = data(N_points)
        l = randomline()
        f = target_function(l)
        t_set = build_training_set(d, f)
        w = [0, 0, 0]  #Start the PLA with the weight vector w being all zeros

        w, iteration = PLA(N_points, w, f, t_set)

        iterations.append(iteration)
        # check if points are classified or not
        for i in range(len(t_set)):
            point = t_set[i][0]
            s = h(w, point)
            yn = t_set[i][1]
            if yn != s:
                samples.append(0)
                b_misclassified = True
                break

        # check difference between f and g
        diff.append(evaluate_diff_f_g(f, w))
        if not b_misclassified: samples.append(1)

        b_misclassified = False

    print 'number of samples misclassified: %s ' % samples.count(0)
    print 'number of classified samples: %s ' % samples.count(1)
    print 'number of iteration avg: %s ' % (str(
        sum(iterations) / len(iterations) * 1.0))
    print 'average of difference in function g: %s' % (sum(diff) /
                                                       (len(diff) * 1.0))
Example #8
0
def run_PLA(N_samples,N_points):
    samples = []# vector of 1 clasified, 0 misclassified
    iterations = []#vector of iterations needed for each PLA
    b_misclassified = False
    diff = []#vector of difference average between f and g

    for i in range(N_samples):
        # run PLA in sample
        d = data(N_points)
        l = randomline()
        f = target_function(l)
        t_set = build_training_set(d,f)
        w = [0,0,0]

        w,iteration = PLA(N_points,w,f,t_set)

        iterations.append(iteration)
        # check if points are classified or not
        for i in range(len(t_set)):
            point = t_set[i][0]
            s = h(w,point)
            yn = t_set[i][1]
            if yn != s:
                samples.append(0)
                b_misclassified = True
                break

        # check difference between f and g
        diff.append(evaluate_diff_f_g(f,w))
        if not b_misclassified: samples.append(1)

        b_misclassified = False

    print 'number of samples misclassified: %s ' % samples.count(0)
    print 'number of classified samples: %s ' % samples.count(1)
    print 'number of iteration avg: %s ' % (str(sum(iterations)/len(iterations)*1.0))
    print 'average of difference in function g: %s' % ( sum(diff)/(len(diff)*1.0) )
Example #9
0
def run_pla_vs_svm(nbruns=1, N=10):
    solvers.options['show_progress'] = False

    d = []
    l = 0
    f = 0
    t_set = []
    y = []
    svm_vs_pla = []
    for i in range(nbruns):
        onBothSides = False
        while (not onBothSides):
            d = data(N)
            l = randomline()
            f = target_function(l)
            t_set = build_training_set(d, f)
            y = target_vector(t_set)
            if (1 in y) and (-1 in y):
                onBothSides = True
            else:
                onBothSides = False
        w = [0, 0, 0]
        w_pla, iteration = PLA(N, w, f, t_set)
        plaEout = evaluate_diff_f_g(f, w_pla)
        X_matrix = input_data_matrix(t_set)
        dimension = len(X_matrix[0])
        #identity matrix of size dim X dim matrix x,I,J,typecode double
        P = spmatrix(1, range(dimension), range(dimension), tc='d')
        #vector of zeros of size dim, typecode double
        q = matrix([0] * (dimension), tc='d')

        mat = []
        for t in t_set:
            y = t[1]
            temp = [x * -1.0 * y for x in t[0]]
            mat.append(temp)

        G = matrix(mat, tc='d')
        G = G.trans()
        # vectors of -1 of size t_set
        h = matrix([-1] * len(t_set), tc='d')
        #http://abel.ee.ucla.edu/cvxopt/examples/tutorial/qp.html
        qp_sln = solvers.qp(P, q, G, h)
        wsvm = list(qp_sln['x'])
        # number of support vectors you can get at each run
        count_sv = 0
        for t in t_set:
            wsvm = array(wsvm)
            x = array(t[0])
            y = t[1]
            res = fabs(y * dot(wsvm, x) - 1)
            if res < 0.001:
                count_sv = count_sv + 1
        #print count_sv
        # Eout of svm
        svmEout = computeEout_svm(f, wsvm)
        #print 'svmEout: %s'%svmEout
        if (svmEout < plaEout):
            svm_vs_pla.append([True, count_sv])
        else:
            svm_vs_pla.append([False, count_sv])

    print "svm win pla %f" % (len(filter(lambda a: a[0] is True, svm_vs_pla)) *
                              1.0 / N)
    percent_svm_won = len([r[0] for r in svm_vs_pla if r[0] is True
                           ]) * 1.0 / len(svm_vs_pla)
    print "question 9: svm beat pla %f percent of the time" % (
        percent_svm_won * 100)

    avg_sv = sum([a[1] for a in svm_vs_pla]) * 1.0 / len(svm_vs_pla)
    print "avg sv:", avg_sv
Example #10
0
def run_pla_vs_svm(nbruns = 1, N = 10):
    solvers.options['show_progress'] = False
    
    d = []
    l = 0
    f = 0
    t_set = []
    y = []
    svm_vs_pla = []
    for i in range(nbruns):
        onBothSides = False
        while(not onBothSides):
            d = data(N)
            l = randomline()
            f = target_function(l)
            t_set = build_training_set(d,f)
            y = target_vector(t_set)
            if (1 in y) and (-1 in y):
                onBothSides = True
            else:
                onBothSides = False
        w = [0,0,0]
        w_pla,iteration = PLA(N,w,f,t_set)
        plaEout = evaluate_diff_f_g(f,w_pla)
        X_matrix = input_data_matrix(t_set)
        dimension = len(X_matrix[0])
        #identity matrix of size dim X dim matrix x,I,J,typecode double
        P = spmatrix(1, range(dimension), range(dimension), tc='d')
        #vector of zeros of size dim, typecode double
        q = matrix([0]*(dimension), tc='d')

        mat = []
        for t in t_set:
            y = t[1]
            temp = [x * -1.0*y for x in t[0]]
            mat.append(temp) 
        
        G = matrix(mat, tc='d')
        G = G.trans()
        # vectors of -1 of size t_set
        h = matrix([-1]*len(t_set), tc='d')
        #http://abel.ee.ucla.edu/cvxopt/examples/tutorial/qp.html
        qp_sln = solvers.qp(P, q, G, h)
        wsvm = list(qp_sln['x'])
        # number of support vectors you can get at each run
        count_sv = 0
        for t in t_set:
            wsvm = array(wsvm)
            x = array(t[0])
            y = t[1]
            res = fabs(y*dot(wsvm,x)-1)
            if res < 0.001:
                count_sv = count_sv + 1
        #print count_sv
        # Eout of svm
        svmEout = computeEout_svm(f,wsvm)
        #print 'svmEout: %s'%svmEout
        if(svmEout < plaEout):
            svm_vs_pla.append([True,count_sv])
        else:
            svm_vs_pla.append([False,count_sv])

    print "svm win pla %f" % (len(filter(lambda a: a[0] is True, svm_vs_pla))*1.0/N) 
    percent_svm_won = len([r[0] for r in svm_vs_pla if r[0] is True])*1.0/len(svm_vs_pla)
    print "question 9: svm beat pla %f percent of the time" % (percent_svm_won*100)

    avg_sv = sum([a[1] for a in svm_vs_pla])*1.0/len(svm_vs_pla) 
    print "avg sv:", avg_sv