Beispiel #1
0
def run_reg_linear_reg_one_vs_all(dTrain,dTest):

    lda = 1.0
    for i in range(0,10):
        dTrain_current = getDataOneVsAll(dTrain,i)
        t_set = []
        # in sample
        for d in dTrain_current:
            t_set.append([[1,d[1],d[2]],d[0]])
        # out of sample
        dTest_current = getDataOneVsAll(dTest,i)
        t_setout = []
        for d in dTest_current:
            t_setout.append([[1,d[1],d[2]],d[0]])
        # in sample with no transform
        wlin,X0,y0 = linear_regression(len(t_set),t_set)
        print 'For %s vs all Ein = %s'%(i,compute_Ein(wlin,X0,y0))
        # out of sample with no transform
        wout,Xout,yout = linear_regression(len(t_setout),t_setout)
        print 'For %s vs all Eout = %s'%(i,compute_Ein(wlin,Xout,yout))
        # in sample with transform
        t_set_trans = transform_t_set(t_set)
        wtrans,Xtrans,ytrans = linear_regression(len(t_set_trans),t_set_trans)
        # out of sample with transform        
        t_setout = transform_t_set(t_setout)
        wt,xt,yt = linear_regression(len(t_setout),t_setout)
        print 'For %s vs all with transformation Eout = %s'%(i,compute_Ein(wtrans,xt,yt))
Beispiel #2
0
def run_reg_linear_reg_one_vs_all(dTrain, dTest):

    lda = 1.0
    for i in range(0, 10):
        dTrain_current = getDataOneVsAll(dTrain, i)
        t_set = []
        # in sample
        for d in dTrain_current:
            t_set.append([[1, d[1], d[2]], d[0]])
        # out of sample
        dTest_current = getDataOneVsAll(dTest, i)
        t_setout = []
        for d in dTest_current:
            t_setout.append([[1, d[1], d[2]], d[0]])
        # in sample with no transform
        wlin, X0, y0 = linear_regression(len(t_set), t_set)
        print 'For %s vs all Ein = %s' % (i, compute_Ein(wlin, X0, y0))
        # out of sample with no transform
        wout, Xout, yout = linear_regression(len(t_setout), t_setout)
        print 'For %s vs all Eout = %s' % (i, compute_Ein(wlin, Xout, yout))
        # in sample with transform
        t_set_trans = transform_t_set(t_set)
        wtrans, Xtrans, ytrans = linear_regression(len(t_set_trans),
                                                   t_set_trans)
        # out of sample with transform
        t_setout = transform_t_set(t_setout)
        wt, xt, yt = linear_regression(len(t_setout), t_setout)
        print 'For %s vs all with transformation Eout = %s' % (
            i, compute_Ein(wtrans, xt, yt))
Beispiel #3
0
def run_linear_regression(N_samples, N_points):
    '''runs on N_samples and with N_points a linear regression
    computes Ein by average of the samples as well as Eout
    '''
    print 'running Linear Regression on %s samples' % str(N_samples)
    print 'Each sample has %s data points' % str(N_points)

    Ein_avg = []
    Eout_avg = []

    for i in range(N_samples):

        d = data(N_points)
        l = randomline()
        f = target_function(l)
        t_set = build_training_set(d, f)

        wlin, X, y = linear_regression(N_points, t_set)

        Ein = compute_Ein(wlin, X, y)
        Ein_avg.append(Ein)

        Eout = compute_Eout(wlin, f, N_points)
        Eout_avg.append(Eout)

    print_avg('Ein', Ein_avg)
    print_avg('Eout', Eout_avg)
Beispiel #4
0
def run_linear_regression(N_samples,N_points):
    '''runs on N_samples and with N_points a linear regression
    computes Ein by average of the samples as well as Eout
    '''
    print 'running Linear Regression on %s samples' %str(N_samples)
    print 'Each sample has %s data points' %str(N_points)

    Ein_avg = []
    Eout_avg = []

    for i in range(N_samples):

        d = data(N_points)
        l = randomline()
        f = target_function(l)
        t_set = build_training_set(d,f)

        wlin,X,y = linear_regression(N_points,t_set)

        Ein = compute_Ein(wlin,X,y)
        Ein_avg.append(Ein)

        Eout = compute_Eout(wlin,f,N_points)
        Eout_avg.append(Eout)
        
    print_avg('Ein',Ein_avg)
    print_avg('Eout',Eout_avg)
Beispiel #5
0
def evaluate_error(X_train, y_train, X_val, y_val, X_test, y_test):
    E_val = []
    E_test = []
    for k in [3, 4, 5, 6, 7]:
        #Fit transformed train data using linear regression without regularization
        #, using only k features of X_train
        w_lin = tools.linear_regression(X_train[:, :k + 1], y_train)

        #Predict class of validation set
        y_val_pred = tools.predict(X_val[:, :k + 1], w_lin)

        #Calculate classification error on validation set
        E_val.append(tools.cal_error(y_val, y_val_pred))

        #Predict class of test set
        y_test_pred = tools.predict(X_test[:, :k + 1], w_lin)

        #Calculate classification error on test set
        E_test.append(tools.cal_error(y_test, y_test_pred))

    print(E_val)
    print(E_test)
    print('Smallest error on validation set is achieved when k = {}'.format(
        3 + np.argmin(E_val)))
    print('Smallest error on test set is {}, achieved when k = {}'.format(
        E_test[np.argmin(E_test)], 3 + np.argmin(E_test)))
Beispiel #6
0
def run_reg_linear_reg_one_vs_one(dTrain, dTest):

    lda1 = 0.01
    lda2 = 1
    # 1 vs 5
    dTrain_current = getDataOneVsOne(dTrain, 1, 5)
    t_set = []
    # in sample
    for d in dTrain_current:
        t_set.append([[1, d[1], d[2]], d[0]])
    # out of sample
    dTest_current = getDataOneVsOne(dTest, 1, 5)
    t_setout = []
    t_setout2 = []
    for d in dTest_current:
        t_setout.append([[1, d[1], d[2]], d[0]])
        t_setout2.append([[1, d[1], d[2]], d[0]])
    print '--------------------------------------------------'
    print 'lambda is: %s' % (lda1)
    # in sample with no transform
    wlin, X0, y0 = linear_regression(len(t_set), t_set, lda1)
    print 'For 1 vs 5 Ein = %s' % (compute_Ein(wlin, X0, y0))
    # out of sample with no transform
    wout, Xout, yout = linear_regression(len(t_setout), t_setout, lda1)
    print 'For 1 vs 5 Eout = %s' % (compute_Ein(wlin, Xout, yout))
    # in sample with transform
    t_set_trans = transform_t_set(t_set)
    wtrans, Xtrans, ytrans = linear_regression(len(t_set_trans), t_set_trans,
                                               lda1)
    # out of sample with transform
    t_setout = transform_t_set(t_setout)
    wt, xt, yt = linear_regression(len(t_setout), t_setout, lda1)
    print 'For 1 vs 5 with transformation Ein = %s' % (compute_Ein(
        wtrans, Xtrans, ytrans))
    print 'For 1 vs 5 with transformation Eout = %s' % (compute_Ein(
        wtrans, xt, yt))
    print '--------------------------------------------------'
    print 'lambda is: %s' % (lda2)
    # in sample with no transform
    wlin2, X02, y02 = linear_regression(len(t_set), t_set, lda2)
    print 'For 1 vs 5 Ein = %s' % (compute_Ein(wlin2, X02, y02))
    # out of sample with no transform
    wout2, Xout2, yout2 = linear_regression(len(t_setout2), t_setout2, lda2)
    print 'For 1 vs 5 Eout = %s' % (compute_Ein(wlin2, Xout2, yout2))
    # in sample with transform
    t_set_trans2 = transform_t_set(t_set)
    wtrans2, Xtrans2, ytrans2 = linear_regression(len(t_set_trans2),
                                                  t_set_trans2, lda2)
    # out of sample with transform
    t_setout2 = transform_t_set(t_setout2)
    wt2, xt2, yt2 = linear_regression(len(t_setout2), t_setout2, lda2)
    print 'For 1 vs 5 with transformation Ein = %s' % (compute_Ein(
        wtrans2, Xtrans2, ytrans2))
    print 'For 1 vs 5 with transformation Eout = %s' % (compute_Ein(
        wtrans2, xt2, yt2))
Beispiel #7
0
def run_reg_linear_reg_one_vs_one(dTrain,dTest):

    lda1 = 0.01
    lda2 = 1
    # 1 vs 5
    dTrain_current = getDataOneVsOne(dTrain,1,5)
    t_set = []
        # in sample
    for d in dTrain_current:
        t_set.append([[1,d[1],d[2]],d[0]])
    # out of sample
    dTest_current = getDataOneVsOne(dTest,1,5)
    t_setout = []
    t_setout2 = []
    for d in dTest_current:
        t_setout.append([[1,d[1],d[2]],d[0]])
        t_setout2.append([[1,d[1],d[2]],d[0]])
    print '--------------------------------------------------'
    print 'lambda is: %s'%(lda1)
    # in sample with no transform
    wlin,X0,y0 = linear_regression(len(t_set),t_set,lda1)
    print 'For 1 vs 5 Ein = %s'%(compute_Ein(wlin,X0,y0))
    # out of sample with no transform
    wout,Xout,yout = linear_regression(len(t_setout),t_setout,lda1)
    print 'For 1 vs 5 Eout = %s'%(compute_Ein(wlin,Xout,yout))
    # in sample with transform
    t_set_trans = transform_t_set(t_set)
    wtrans,Xtrans,ytrans = linear_regression(len(t_set_trans),t_set_trans,lda1)
    # out of sample with transform        
    t_setout = transform_t_set(t_setout)
    wt,xt,yt = linear_regression(len(t_setout),t_setout,lda1)
    print 'For 1 vs 5 with transformation Ein = %s'%(compute_Ein(wtrans,Xtrans,ytrans))
    print 'For 1 vs 5 with transformation Eout = %s'%(compute_Ein(wtrans,xt,yt))   
    print '--------------------------------------------------'
    print 'lambda is: %s'%(lda2)
    # in sample with no transform
    wlin2,X02,y02 = linear_regression(len(t_set),t_set,lda2)
    print 'For 1 vs 5 Ein = %s'%(compute_Ein(wlin2,X02,y02))
    # out of sample with no transform
    wout2,Xout2,yout2 = linear_regression(len(t_setout2),t_setout2,lda2)
    print 'For 1 vs 5 Eout = %s'%(compute_Ein(wlin2,Xout2,yout2))
    # in sample with transform
    t_set_trans2 = transform_t_set(t_set)
    wtrans2,Xtrans2,ytrans2 = linear_regression(len(t_set_trans2),t_set_trans2,lda2)
    # out of sample with transform        
    t_setout2 = transform_t_set(t_setout2)
    wt2,xt2,yt2 = linear_regression(len(t_setout2),t_setout2,lda2)
    print 'For 1 vs 5 with transformation Ein = %s'%(compute_Ein(wtrans2,Xtrans2,ytrans2))
    print 'For 1 vs 5 with transformation Eout = %s'%(compute_Ein(wtrans2,xt2,yt2))
Beispiel #8
0
def run_nonlineartransformation(indata, outdata):
    N_points = len(indata)

    t_set_trans = transform_t_set(indata)
    wtrans, Xtrans, ytrans = linear_regression(N_points, t_set_trans)
    print '-2-'
    print 'Linear regression on training set after non linear transformation:'
    Eintrans = compute_Ein(wtrans, Xtrans, ytrans)
    Eouttrans = compute_Eout_nonlineartrans(wtrans, outdata)
    print 'in sample classification error: %s' % (Eintrans)
    print 'out of sample classification error: %s' % (Eouttrans)
    print '-3-'
    print 'Adding weight decay to linear regression with lambda = 10k and k = -3'
    w_decay = compute_weight_decay(wtrans, t_set_trans, Xtrans, ytrans, -3)
    Eintrans_decay = compute_Ein(w_decay, Xtrans, ytrans)
    Eouttrans_decay = compute_Eout_nonlineartrans(w_decay, outdata)
    print 'in sample classification error:%s' % (Eintrans_decay)
    print 'out of sample classification error: %s' % (Eouttrans_decay)
    print '-4-'
    print 'Using now k = 3'
    w_decay = compute_weight_decay(wtrans, t_set_trans, Xtrans, ytrans, 3)
    Eintrans_decay = compute_Ein(w_decay, Xtrans, ytrans)
    Eouttrans_decay = compute_Eout_nonlineartrans(w_decay, outdata)
    print 'in sample classification error: %s' % (Eintrans_decay)
    print 'out of sample classification error: %s' % (Eouttrans_decay)
    print '-5-'
    Ks = [2, 1, 0, -1, -2]
    print 'searching the lowest out of sample classification error for the following k values.'
    print 'k in (%s)' % (str(Ks))
    for k in Ks:
        w_decay = compute_weight_decay(wtrans, t_set_trans, Xtrans, ytrans, k)
        Eintrans_decay = compute_Ein(w_decay, Xtrans, ytrans)
        Eouttrans_decay = compute_Eout_nonlineartrans(w_decay, outdata)
        print 'K : %s' % (k)
        print 'in sample classification error: %s' % (Eintrans_decay)
        print 'out of sample classification error: %s' % (Eouttrans_decay)
    print '-6-'
    print 'searching the minimum out of sample classification error by varying k in the integer values.'
    mink = 999
    minEout = 999
    for k in range(-200, 200):
        w_decay = compute_weight_decay(wtrans, t_set_trans, Xtrans, ytrans, k)
        Eintrans_decay = compute_Ein(w_decay, Xtrans, ytrans)
        Eout_decay = compute_Eout_nonlineartrans(w_decay, outdata)
        if Eout_decay < minEout:
            minEout = Eout_decay
            mink = k
    print 'K: %s' % (k)
    print 'out of sample classification error: %s' % (minEout)
Beispiel #9
0
def run_nonlineartransformation(indata,outdata):
    N_points = len(indata)

    t_set_trans = transform_t_set(indata)
    wtrans,Xtrans,ytrans = linear_regression(N_points,t_set_trans)
    print '-2-'
    print 'Linear regression on training set after non linear transformation:'
    Eintrans = compute_Ein(wtrans,Xtrans,ytrans)
    Eouttrans = compute_Eout_nonlineartrans(wtrans,outdata)
    print 'in sample classification error: %s'%(Eintrans)
    print 'out of sample classification error: %s'%(Eouttrans)
    print '-3-'
    print 'Adding weight decay to linear regression with lambda = 10k and k = -3'
    w_decay = compute_weight_decay(wtrans,t_set_trans,Xtrans,ytrans,-3)
    Eintrans_decay = compute_Ein(w_decay,Xtrans,ytrans)
    Eouttrans_decay=compute_Eout_nonlineartrans(w_decay,outdata)
    print 'in sample classification error:%s'%(Eintrans_decay)
    print 'out of sample classification error: %s'%(Eouttrans_decay)
    print '-4-'
    print 'Using now k = 3'
    w_decay = compute_weight_decay(wtrans,t_set_trans,Xtrans,ytrans,3)
    Eintrans_decay = compute_Ein(w_decay,Xtrans,ytrans)
    Eouttrans_decay=compute_Eout_nonlineartrans(w_decay,outdata)
    print 'in sample classification error: %s'%(Eintrans_decay)
    print 'out of sample classification error: %s'%(Eouttrans_decay)
    print '-5-'
    Ks = [2,1,0,-1,-2]
    print 'searching the lowest out of sample classification error for the following k values.'
    print 'k in (%s)'%(str(Ks))
    for k in Ks:
            w_decay = compute_weight_decay(wtrans,t_set_trans,Xtrans,ytrans,k)
            Eintrans_decay = compute_Ein(w_decay,Xtrans,ytrans)
            Eouttrans_decay=compute_Eout_nonlineartrans(w_decay,outdata)
            print 'K : %s'%(k)
            print 'in sample classification error: %s'%(Eintrans_decay)
            print 'out of sample classification error: %s'%(Eouttrans_decay)
    print '-6-'
    print 'searching the minimum out of sample classification error by varying k in the integer values.'
    mink = 999
    minEout = 999
    for k in range(-200,200):
        w_decay = compute_weight_decay(wtrans,t_set_trans,Xtrans,ytrans,k)
        Eintrans_decay = compute_Ein(w_decay,Xtrans,ytrans)
        Eout_decay=compute_Eout_nonlineartrans(w_decay,outdata)
        if Eout_decay < minEout:
            minEout = Eout_decay
            mink = k
    print 'K: %s'%(k)
    print 'out of sample classification error: %s'%(minEout)
Beispiel #10
0
def hsvt_ols(X1, X2, y1, t=0.99, rcond=1e-15, include_pre=True):
    # find underlying ranks
    rank1 = approximate_rank(X1, t=t)
    rank2 = approximate_rank(X2, t=t)
    print(rank1, rank2)

    # de-noise donor matrices
    X1_hsvt = hsvt(X1, rank=rank1)
    X2_hsvt = hsvt(X2, rank=rank2)

    # learn synthetic control via linear regression
    beta = linear_regression(X1_hsvt, y1, rcond=rcond)
    # forecast counterfactuals
    y2h = X2_hsvt.dot(beta).T
    yh = np.concatenate([X1_hsvt.dot(beta).T, y2h]) if include_pre else y2h

    # prediction intervals
    std = np.sqrt(np.mean((X1 - X1_hsvt)**2))
    return yh
Beispiel #11
0
def hsvt_fit(controls,
             treated,
             T0,
             t=0.99,
             rcond=1e-15,
             include_pre=True,
             retbeta=True,
             verbose=False,
             combined=False):

    y1 = treated[:T0]

    if combined:
        X1, X2 = controls[:, :T0], controls[:, T0:]
        X1, X2 = X1.T, X2.T
        rank = approximate_rank(controls.T, t=t)
        X_hsvt = hsvt(controls.T, rank=rank)
        X1_hsvt = X_hsvt[:T0, :]
        X2_hsvt = X_hsvt[T0:, :]
        if verbose: print(rank)
    else:
        X1, X2 = controls[:, :T0], controls[:, T0:]
        X1, X2 = X1.T, X2.T
        # find underlying ranks
        rank1 = approximate_rank(X1, t=t)
        rank2 = approximate_rank(X2, t=t)
        if verbose: print(rank1, rank2)
        # de-noise donor matrices
        X1_hsvt = hsvt(X1, rank=rank1)
        X2_hsvt = hsvt(X2, rank=rank2)

    # learn synthetic control via linear regression
    beta = linear_regression(X1_hsvt, y1, rcond=rcond)
    # forecast counterfactuals
    y2h = X2_hsvt.dot(beta).T
    yh = np.concatenate([X1_hsvt.dot(beta).T, y2h]) if include_pre else y2h

    # prediction intervals
    std = np.sqrt(np.mean((X1 - X1_hsvt)**2))
    if retbeta: return yh, beta
    else: return yh
Beispiel #12
0
def run_lr_and_pla(N_samples, N_points):
    '''runs on N_samples and with N_points a linear regresion
    then from the weight vector runs PLA algorithm
    compute the average number of iterations of PLA with this w vector
    '''
    print 'running Linear Regression on %s samples' % N_samples
    print 'Each samples has %s data points' % N_points

    iteration_avg = []
    for i in range(N_samples):

        d = data(N_points)
        l = randomline()
        f = target_function(l)
        t_set = build_training_set(d, f)

        wlin, X, y = linear_regression(N_points, t_set)

        w_pla, iteration = PLA(N_points, wlin, f, t_set)
        iteration_avg.append(iteration)

    print_avg('Number of iterations', iteration_avg)
Beispiel #13
0
def run_lr_and_pla(N_samples, N_points):
    '''runs on N_samples and with N_points a linear regresion
    then from the weight vector runs PLA algorithm
    compute the average number of iterations of PLA with this w vector
    '''
    print 'running Linear Regression on %s samples' %N_samples
    print 'Each samples has %s data points' %N_points
    
    iteration_avg = []
    for i in range(N_samples):

        d = data(N_points)
        l = randomline()
        f = target_function(l)
        t_set = build_training_set(d,f)
        
        wlin,X,y = linear_regression(N_points,t_set)
        
        w_pla,iteration = PLA(N_points,wlin,f,t_set)
        iteration_avg.append(iteration)
    
    print_avg('Number of iterations',iteration_avg)
Beispiel #14
0
def run_validation(indata_train,indata_val,outdata):
    dict_t_set = {}
    dict_wlin = {}
    dict_t_setval = {}
    dict_yval = {}
    dict_Xval = {}
    dict_Eval = {}
    dict_Eout = {}
    dict_outdata = {}

    #t_set train with transformation
    for i in range(3,8):
        dict_t_set[i] = transform_t_set(indata_train,i)

    #linear regression
    for i in range(3,8):
        t_set = dict_t_set[i]
        size_t_set = len(t_set)
        wlin,X,y = linear_regression(size_t_set,t_set)
        dict_wlin[i] = wlin

    #t_set validation
    for i in range(3,8):
        t_setval = transform_t_set(indata_val,i)
        dict_t_setval[i] = t_setval

    for i in range(3,8):
        t_setval = dict_t_setval[i]

        yval = target_vector(t_setval)
        dict_yval[i] = yval

        Xval = input_data_matrix(t_setval)
        dict_Xval[i] = Xval

    #Eval
    for i in range(3,8):
        wlin = dict_wlin[i]
        Xval = dict_Xval[i]
        yval = dict_yval[i]

        Eval = compute_Eval(wlin,Xval,yval)
        dict_Eval[i] = Eval

    #Eout
    for i in range(3,8):
        curr_outdata = transform_t_set(outdata,i)
        dict_outdata[i] = curr_outdata

    for i in range(3,8):
        wlin = dict_wlin[i]
        curr_outdata = dict_outdata[i]
        eout = compute_Eout_from_data(wlin,curr_outdata,len(curr_outdata))
        dict_Eout[i] = eout
    
    for i in range(3,8):
        Eval = dict_Eval[i]
        Eout = dict_Eout[i]

        print 'Eval for k = %s is: %s'%(i,Eval)
        print 'Eout for k = %s is: %s'%(i,Eout)
        print ''    
Beispiel #15
0
def run_nonlinear_transformation(N_samples, N_points):
    '''use N_samples to have a consistent result
    create a trainng set (1; x1; x2) from a constalation on N_points
    runs linear regration from training set
    computes Ein and averages it through all the samples
    transform the training set following (1; x1; x2; x1x2; x1^2; x2^2)
    run linear transformation on this transformed training set
    compute Ein of transformed t_set and average through all the samples
    create a hypothesis vector from the weight vector and the X matrix of the t_set transformed
    Average for each function g the difference between the hypothesis vector and the function
    finaly compute Eout from the f (target function) and the weight vector from training set that was not transformed
    '''
    Ein_avg = []
    Eout_avg = []
    Eintrans_avg = []
    EdiffA = []
    EdiffB = []
    EdiffC = []
    EdiffD = []
    EdiffE = []

    for i in range(N_samples):

        t_set, f = generate_t_set(N_points)
        wlin, X, y = linear_regression(N_points, t_set)
        Ein = compute_Ein(wlin, X, y)
        Ein_avg.append(Ein)

        #transform the training data into the following nonlinear feature vector:
        #(1; x1; x2; x1x2; x1^2; x2^2)
        t_set_trans = transform_t_set(t_set)
        wtrans, Xtrans, ytrans = linear_regression(N_points, t_set_trans)
        Eintrans = compute_Ein(wtrans, Xtrans, ytrans)
        Eintrans_avg.append(Eintrans)

        h_vector = sign(dot(Xtrans, wtrans))
        gA_vector = compute_g_vector(t_set_trans, 'a')
        Ediff_a = compute_avg_difference(h_vector, gA_vector)
        EdiffA.append(1 - Ediff_a)

        gB_vector = compute_g_vector(t_set_trans, 'b')
        Ediff_b = compute_avg_difference(h_vector, gB_vector)
        EdiffB.append(1 - Ediff_b)

        gC_vector = compute_g_vector(t_set_trans, 'c')
        Ediff_c = compute_avg_difference(h_vector, gC_vector)
        EdiffC.append(1 - Ediff_c)

        gD_vector = compute_g_vector(t_set_trans, 'd')
        Ediff_d = compute_avg_difference(h_vector, gD_vector)
        EdiffD.append(1 - Ediff_d)

        gE_vector = compute_g_vector(t_set_trans, 'e')
        Ediff_e = compute_avg_difference(h_vector, gE_vector)
        EdiffE.append(1 - Ediff_e)

        Eout = compute_Eout_nonlineartrans(wtrans, f, N_points)
        Eout_avg.append(Eout)

    print_avg('Ein', Ein_avg)
    print_avg('Ein Transformed', Eintrans_avg)
    print_avg('P of agreeing A', EdiffA)
    print_avg('P of agreeing B', EdiffB)
    print_avg('P of agreeing C', EdiffC)
    print_avg('P of agreeing D', EdiffD)
    print_avg('P of agreeing E', EdiffE)
    print_avg('Eout', Eout_avg)
Beispiel #16
0
def run_nonlinear_transformation(N_samples, N_points):
    '''use N_samples to have a consistent result
    create a trainng set (1; x1; x2) from a constalation on N_points
    runs linear regration from training set
    computes Ein and averages it through all the samples
    transform the training set following (1; x1; x2; x1x2; x1^2; x2^2)
    run linear transformation on this transformed training set
    compute Ein of transformed t_set and average through all the samples
    create a hypothesis vector from the weight vector and the X matrix of the t_set transformed
    Average for each function g the difference between the hypothesis vector and the function
    finaly compute Eout from the f (target function) and the weight vector from training set that was not transformed
    '''
    Ein_avg = []
    Eout_avg = []
    Eintrans_avg = []
    EdiffA = []
    EdiffB = []
    EdiffC = []
    EdiffD = []
    EdiffE = []

    for i in range(N_samples):

        t_set,f = generate_t_set(N_points)
        wlin,X,y = linear_regression(N_points,t_set)
        Ein = compute_Ein(wlin, X, y)
        Ein_avg.append(Ein)

        #transform the training data into the following nonlinear feature vector:
        #(1; x1; x2; x1x2; x1^2; x2^2)
        t_set_trans = transform_t_set(t_set)
        wtrans,Xtrans,ytrans = linear_regression(N_points,t_set_trans)
        Eintrans = compute_Ein(wtrans,Xtrans,ytrans)
        Eintrans_avg.append(Eintrans)
    
        h_vector =sign(dot(Xtrans,wtrans))
        gA_vector = compute_g_vector(t_set_trans,'a')
        Ediff_a = compute_avg_difference(h_vector,gA_vector)
        EdiffA.append(1-Ediff_a)
        
        gB_vector = compute_g_vector(t_set_trans,'b')
        Ediff_b = compute_avg_difference(h_vector,gB_vector)
        EdiffB.append(1-Ediff_b)

        gC_vector = compute_g_vector(t_set_trans,'c')
        Ediff_c = compute_avg_difference(h_vector,gC_vector)
        EdiffC.append(1-Ediff_c)
        
        gD_vector = compute_g_vector(t_set_trans,'d')
        Ediff_d = compute_avg_difference(h_vector,gD_vector)
        EdiffD.append(1-Ediff_d)
        
        gE_vector = compute_g_vector(t_set_trans,'e')
        Ediff_e = compute_avg_difference(h_vector,gE_vector)
        EdiffE.append(1-Ediff_e)

        Eout = compute_Eout_nonlineartrans(wtrans,f,N_points)
        Eout_avg.append(Eout)

    print_avg('Ein',Ein_avg)
    print_avg('Ein Transformed',Eintrans_avg)
    print_avg('P of agreeing A',EdiffA)
    print_avg('P of agreeing B',EdiffB)
    print_avg('P of agreeing C',EdiffC)
    print_avg('P of agreeing D',EdiffD)
    print_avg('P of agreeing E',EdiffE)
    print_avg('Eout',Eout_avg)
Beispiel #17
0
def run_validation(indata_train, indata_val, outdata):
    dict_t_set = {}
    dict_wlin = {}
    dict_t_setval = {}
    dict_yval = {}
    dict_Xval = {}
    dict_Eval = {}
    dict_Eout = {}
    dict_outdata = {}

    #t_set train with transformation
    for i in range(3, 8):
        dict_t_set[i] = transform_t_set(indata_train, i)

    #linear regression
    for i in range(3, 8):
        t_set = dict_t_set[i]
        size_t_set = len(t_set)
        wlin, X, y = linear_regression(size_t_set, t_set)
        dict_wlin[i] = wlin

    #t_set validation
    for i in range(3, 8):
        t_setval = transform_t_set(indata_val, i)
        dict_t_setval[i] = t_setval

    for i in range(3, 8):
        t_setval = dict_t_setval[i]

        yval = target_vector(t_setval)
        dict_yval[i] = yval

        Xval = input_data_matrix(t_setval)
        dict_Xval[i] = Xval

    #Eval
    for i in range(3, 8):
        wlin = dict_wlin[i]
        Xval = dict_Xval[i]
        yval = dict_yval[i]

        Eval = compute_Eval(wlin, Xval, yval)
        dict_Eval[i] = Eval

    #Eout
    for i in range(3, 8):
        curr_outdata = transform_t_set(outdata, i)
        dict_outdata[i] = curr_outdata

    for i in range(3, 8):
        wlin = dict_wlin[i]
        curr_outdata = dict_outdata[i]
        eout = compute_Eout_from_data(wlin, curr_outdata, len(curr_outdata))
        dict_Eout[i] = eout

    for i in range(3, 8):
        Eval = dict_Eval[i]
        Eout = dict_Eout[i]

        print 'Eval for k = %s is: %s' % (i, Eval)
        print 'Eout for k = %s is: %s' % (i, Eout)
        print ''