Example #1
0
def greedyRLS(XPath, yPath, metaPath, fcount=5, scount=50, resultPath=None):
    X, Y = readAuto(XPath, yPath)
    meta = {}
    if metaPath != None:
        print "Loading metadata from", metaPath
        meta = result.getMeta(metaPath)
    X_train, X_hidden, Y_train, Y_hidden = hidden.split(X, Y, meta=meta) 
    #if "classes" in meta:
    #    print "Class distribution = ", getClassDistribution(y)

    #logrps = range(15, 25)
    logrps = range(15, 26)
    print "Training RLS"
    loopCount = 1
    best_perf = -1
    best_logrp = None
    best_scount = None
    for logrp in logrps:
        kf = KFold(len(Y_train), n_folds=fcount, indices=True, shuffle=True, random_state=77)
        for train, test in kf:
            perfs = []
            print "------------ Processing fold", str(loopCount) + "/" + str(fcount), "------------"
            kwargs = {}
            kwargs['train_features'] = X_train[train]
            kwargs['train_labels'] = Y_train[train]
            kwargs['subsetsize'] = scount
            kwargs['regparam'] = 2.**logrp
            kwargs['bias'] = 1
            cb = CallbackFunction(X_train[test], Y_train[test])
            kwargs['callback_obj'] = cb
            rls = GreedyRLS.createLearner(**kwargs)
            rls.train()
            perfs.append(cb.perfs)
            loopCount += 1
            print "---------------------------------------------------"
        perfs = np.mean(perfs, axis=0)
        perf = np.max(perfs)
        perf = perfs[-1]
        sc = np.argmax(perfs)+1
        print "%f AUC, %d logrp, %d selected" %(perf, logrp, sc)
        if perf>best_perf:
            best_perf = perf
            best_logrp = logrp
            best_scount = sc
    kwargs = {}
    kwargs['train_features'] = X_train
    kwargs['train_labels'] = Y_train
    kwargs['subsetsize'] = scount
    kwargs['regparam'] = 2.**best_logrp
    kwargs['bias'] = 1
    cb = CallbackFunction(X_hidden, Y_hidden)
    kwargs['callback_obj'] = cb
    rls = GreedyRLS.createLearner(**kwargs)
    rls.train()
    perfs = cb.perfs
    selected = rls.selected
    model = rls.getModel()
    #if resultPath != None:
    #    saveResults(meta, resultPath, perfs, selected)
    return model, perfs, selected, best_logrp, best_scount
Example #2
0
 def test_compare(self):
     for X in [self.Xtrain1, self.Xtrain2]:
         for Y in [self.Ytrain1, self.Ytrain2]:
             #No bias
             greedy_rls = GreedyRLS(X,
                                    Y,
                                    subsetsize=10,
                                    regparam=12,
                                    bias=0.)
             selected = greedy_rls.selected
             s_complement = list(
                 set(range(X.shape[1])).difference(selected))
             X_cut = X[:, selected]
             rls = RLS(X_cut, Y, regparam=12., bias=0.)
             W = greedy_rls.predictor.W[selected]
             W2 = rls.predictor.W
             assert_allclose(W, W2)
             assert_array_equal(greedy_rls.predictor.W[s_complement], 0)
             assert_array_equal(greedy_rls.predictor.b, 0)
             #Bias
             greedy_rls = GreedyRLS(X,
                                    Y,
                                    subsetsize=10,
                                    regparam=12,
                                    bias=2.)
             selected = greedy_rls.selected
             X_cut = X[:, selected]
             rls = RLS(X_cut, Y, regparam=12., bias=2.)
             W = greedy_rls.predictor.W[selected]
             W2 = rls.predictor.W
             assert_allclose(W, W2)
             assert_allclose(greedy_rls.predictor.b, rls.predictor.b)
Example #3
0
def train_rls():
    X_train, Y_train, X_test, Y_test = load_housing()
    cb = Callback(X_test, Y_test)
    learner = GreedyRLS(X_train, Y_train, 13, callbackfun = cb)
    #Test set predictions
    P_test = learner.predict(X_test)
    print("test error %f" %sqerror(Y_test, P_test))
    print("Selected features " +str(learner.selected))
Example #4
0
def train_rls():
    X_train, Y_train, X_test, Y_test = load_housing()
    #we select 5 features
    learner = GreedyRLS(X_train, Y_train, 5)
    #Test set predictions
    P_test = learner.predict(X_test)
    print("test error %f" %sqerror(Y_test, P_test))
    print("Selected features " +str(learner.selected))
Example #5
0
def train_rls():
    X_train, Y_train, X_test, Y_test = load_housing()
    cb = Callback(X_test, Y_test)
    learner = GreedyRLS(X_train, Y_train, 13, callbackfun=cb)
    #Test set predictions
    P_test = learner.predict(X_test)
    print("test error %f" % sqerror(Y_test, P_test))
    print("Selected features " + str(learner.selected))
Example #6
0
def train_rls():
    #Trains RLS with default parameters (regparam=1.0, kernel='LinearKernel')
    X_train, Y_train, X_test, Y_test = load_housing()
    learner = GreedyRLS(X_train, Y_train, 5)
    #This is how we make predictions
    P_test = learner.predict(X_test)
    #We can separate the predictor from learner
    predictor = learner.predictor
    #And do the same predictions
    P_test = predictor.predict(X_test)
    #Let's get the coefficients of the predictor
    w = predictor.W
    b = predictor.b
    print("number of coefficients %d" %len(w))
    print("w-coefficients " +str(w))
    print("bias term %f" %b)
Example #7
0
def speedtest():
    tsize, fsize = 3000, 3000
    desiredfcount = 5
    Xtrain = mat(random.rand(fsize, tsize), dtype=float64)
    bias = 2.
    rp = 1.
    ylen = 2
    Y = mat(random.rand(tsize, ylen), dtype=float64)

    rpool = {}

    class TestCallback(object):
        def callback(self, learner):
            print('round')

        def finished(self, learner):
            pass

    tcb = TestCallback()
    rpool['callback'] = tcb
    rpool['X'] = Xtrain.T
    rpool['Y'] = Y

    rpool['subsetsize'] = str(desiredfcount)
    rpool['regparam'] = rp
    rpool['bias'] = bias
    grls = GreedyRLS(**rpool)

    print(grls.selected)
    print(grls.A[grls.selected])
    print(grls.b)
Example #8
0
def train_rls():
    #Trains RLS with default parameters (regparam=1.0, kernel='LinearKernel')
    X_train, Y_train, X_test, Y_test = load_housing()
    learner = GreedyRLS(X_train, Y_train, 5)
    #This is how we make predictions
    P_test = learner.predict(X_test)
    #We can separate the predictor from learner
    predictor = learner.predictor
    #And do the same predictions
    P_test = predictor.predict(X_test)
    #Let's get the coefficients of the predictor
    w = predictor.W
    b = predictor.b
    print("number of coefficients %d" % len(w))
    print("w-coefficients " + str(w))
    print("bias term %f" % b)
Example #9
0
def speedtest():
    tsize, fsize = 3000, 3000
    desiredfcount = 5
    Xtrain = mat(random.rand(fsize, tsize), dtype=float64)
    #Xtrain = mat(random.randint(0,10,size = (fsize, tsize)), dtype=int8)
    #save("foo",Xtrain)
    bias = 2.
    rp = 1.
    bias_slice = sqrt(bias)*mat(ones((1,Xtrain.shape[1]), dtype=float64))
    Xtrain_biased = vstack([Xtrain,bias_slice])
    #K = Xtrain.T * Xtrain
    ylen = 2
    #Y = mat(zeros((tsize, ylen), dtype=floattype))
    Y = mat(random.rand(tsize, ylen), dtype=float64)
    
    rpool = {}
    class TestCallback(CF):
        def callback(self, learner):
            #print learner.performances[len(learner.performances)-1]
            #print 'GreedyRLS', learner.looperf.T
            print 'round'
    tcb = TestCallback()
    rpool['callback'] = tcb
    rpool['train_features'] = Xtrain.T
    rpool['train_labels'] = Y
    
    rpool['subsetsize'] = str(desiredfcount)
    rpool['regparam'] = rp
    rpool['bias'] = bias
    grls = GreedyRLS.createLearner(**rpool)
    grls.train()
    
    print grls.selected
    print grls.A[grls.selected]
    print grls.b
Example #10
0
def core_greedyrls(X, y, regparam, scount):
    cb = Callback()
    learner = GreedyRLS(X,
                        y,
                        scount,
                        regparam=regparam,
                        callbackfun=cb,
                        bias=0.)
    selected = learner.selected
    return selected
Example #11
0
def train_rls():
    mndata = MNIST("./data")
    X_train, Y_train = mndata.load_training()
    X_test, Y_test = mndata.load_testing()
    X_train, X_test = np.array(X_train), np.array(X_test)
    #One-vs-all mapping
    Y_train = ova(Y_train)
    Y_test = ova(Y_test)
    #Train greedy RLS, select 10 features
    cb = Callback(X_test, Y_test)
    learner = GreedyRLS(X_train, Y_train, 50, callbackfun=cb)
    print("Selected features " + str(learner.selected))
Example #12
0
def speedtest():
    tsize, fsize = 3000, 3000
    desiredfcount = 5
    Xtrain = mat(random.rand(fsize, tsize), dtype=float64)
    #Xtrain = mat(random.randint(0,10,size = (fsize, tsize)), dtype=int8)
    #save("foo",Xtrain)
    bias = 2.
    rp = 1.
    bias_slice = sqrt(bias) * mat(ones((1, Xtrain.shape[1]), dtype=float64))
    Xtrain_biased = vstack([Xtrain, bias_slice])
    #K = Xtrain.T * Xtrain
    ylen = 2
    #Y = mat(zeros((tsize, ylen), dtype=floattype))
    Y = mat(random.rand(tsize, ylen), dtype=float64)

    rpool = {}

    class TestCallback(CF):
        def callback(self, learner):
            #print learner.performances[len(learner.performances)-1]
            #print 'GreedyRLS', learner.looperf.T
            print 'round'

    tcb = TestCallback()
    rpool['callback'] = tcb
    rpool['train_features'] = Xtrain.T
    rpool['train_labels'] = Y

    rpool['subsetsize'] = str(desiredfcount)
    rpool['regparam'] = rp
    rpool['bias'] = bias
    grls = GreedyRLS.createLearner(**rpool)
    grls.train()

    print grls.selected
    print grls.A[grls.selected]
    print grls.b
Example #13
0
    def testRLS(self):
        print("\n\n\n\nTesting the correctness of the GreedyRLS module.\n\n")
        tsize, fsize = 10, 30
        desiredfcount = 5
        Xtrain = mat(random.rand(fsize, tsize), dtype=float64)
        bias = 2.
        bias_slice = sqrt(bias) * mat(ones(
            (1, Xtrain.shape[1]), dtype=float64))
        Xtrain_biased = vstack([Xtrain, bias_slice])
        ylen = 2
        Y = mat(random.rand(tsize, ylen), dtype=float64)
        selected = []
        rp = 1.
        currentfcount = 0
        while currentfcount < desiredfcount:
            selected_plus_bias = selected + [fsize]
            bestlooperf = 9999999999.
            for ci in range(fsize):
                if ci in selected_plus_bias: continue
                updK = Xtrain_biased[selected_plus_bias +
                                     [ci]].T * Xtrain_biased[selected_plus_bias
                                                             + [ci]]
                looperf = 0.
                for hi in range(tsize):
                    hoinds = list(range(0, hi)) + list(range(hi + 1, tsize))
                    updcutK = updK[ix_(hoinds, hoinds)]
                    updcrossK = updK[ix_([hi], hoinds)]
                    loopred = updcrossK * la.inv(
                        updcutK + rp * mat(eye(tsize - 1))) * Y[hoinds]
                    looperf += mean(
                        multiply((loopred - Y[hi]), (loopred - Y[hi])))
                if looperf < bestlooperf:
                    bestcind = ci
                    bestlooperf = looperf
                print('Tester ', ci, looperf)
            selected.append(bestcind)
            print('Tester ', selected)
            currentfcount += 1
        selected_plus_bias = selected + [fsize]
        K = Xtrain_biased[selected_plus_bias].T * Xtrain_biased[
            selected_plus_bias]
        G = la.inv(K + rp * mat(eye(tsize)))
        A = Xtrain_biased[selected_plus_bias] * G * Y
        print('Tester ', A)
        rpool = {}

        class TestCallback(object):
            def callback(self, learner):
                print('GreedyRLS', learner.looperf.T)
                pass

            def finished(self, learner):
                pass

        tcb = TestCallback()
        rpool['callback'] = tcb
        rpool['X'] = Xtrain.T
        rpool['Y'] = Y
        rpool['subsetsize'] = desiredfcount
        rpool['regparam'] = rp
        rpool['bias'] = bias
        grls = GreedyRLS(**rpool)
        assert_array_equal(selected, grls.selected)
        assert_allclose(A[:-1], grls.A[selected])
        assert_allclose(np.sqrt(bias) * A[-1], grls.b)
Example #14
0
    def testRLS(self):
        print
        print
        print
        print
        print "Testing the correctness of the GreedyRLS module."
        print
        print
        floattype = float64

        #m, n = 10, 30
        tsize, fsize = 10, 30
        desiredfcount = 5
        Xtrain = mat(random.rand(fsize, tsize), dtype=float64)
        #Xtrain = mat(random.randint(0,10,size = (fsize, tsize)), dtype=int8)
        #save("foo",Xtrain)
        #print Xtrain
        bias = 2.
        bias_slice = sqrt(bias) * mat(ones(
            (1, Xtrain.shape[1]), dtype=float64))
        Xtrain_biased = vstack([Xtrain, bias_slice])
        #K = Xtrain.T * Xtrain
        ylen = 2
        #Y = mat(zeros((tsize, ylen), dtype=floattype))
        Y = mat(random.rand(tsize, ylen), dtype=float64)
        #Y = mat(random.randint(0,10,size = (tsize, 2)), dtype=int8)
        #save("bar",Y)
        #print Y
        #for i in range(tsize):
        #    if Y[i,0] < 0.5: Y[i,0] = -1.
        #    else: Y[i,0] = 1.

        selected = []

        rp = 1.
        currentfcount = 0
        while currentfcount < desiredfcount:

            selected_plus_bias = selected + [fsize]
            bestlooperf = 9999999999.
            K = Xtrain_biased[selected_plus_bias].T * Xtrain_biased[
                selected_plus_bias]  #+ mat(ones((tsize,tsize)))

            for ci in range(fsize):
                if ci in selected_plus_bias: continue
                cv = Xtrain_biased[ci]
                updK = Xtrain_biased[selected_plus_bias +
                                     [ci]].T * Xtrain_biased[
                                         selected_plus_bias +
                                         [ci]]  #+ mat(ones((tsize,tsize)))
                #print 1. / diag(updG)
                looperf = 0.
                #'''
                for hi in range(tsize):
                    hoinds = range(0, hi) + range(hi + 1, tsize)
                    updcutK = updK[ix_(hoinds, hoinds)]
                    updcrossK = updK[ix_([hi], hoinds)]
                    loopred = updcrossK * la.inv(
                        updcutK + rp * mat(eye(tsize - 1))) * Y[hoinds]
                    looperf += mean(
                        multiply((loopred - Y[hi]), (loopred - Y[hi])))
                '''
                loodiff = zeros((tsize, ylen))
                updG = la.inv(updK+rp * mat(eye(tsize)))
                for hi in range(tsize):
                    updcrossK = updK[hi]
                    loopred = updcrossK * updG * Y #THIS IS TRAINING SET ERROR, NOT LOO!!!
                    looperf += mean(multiply((loopred - Y[hi]), (loopred - Y[hi])))
                    loodiff[hi] = loopred - Y[hi]
                print loodiff.T'''
                if looperf < bestlooperf:
                    bestcind = ci
                    bestlooperf = looperf
                print 'Tester ', ci, looperf

            selected.append(bestcind)
            print 'Tester ', selected
            currentfcount += 1

        selected_plus_bias = selected + [fsize]
        K = Xtrain_biased[selected_plus_bias].T * Xtrain_biased[
            selected_plus_bias]
        G = la.inv(K + rp * mat(eye(tsize)))
        A = Xtrain_biased[selected_plus_bias] * G * Y
        print 'Tester ', A
        #A = mat(eye(fsize+1))[:,selected_plus_bias]*(Xtrain_biased[selected_plus_bias]*A)

        rpool = {}

        class TestCallback(CF):
            def callback(self, learner):
                #print learner.performances[len(learner.performances)-1]
                print 'GreedyRLS', learner.looperf.T
                pass

        tcb = TestCallback()
        rpool['callback'] = tcb
        rpool['train_features'] = Xtrain.T
        rpool['train_labels'] = Y
        #rpool['multi_task_train_features'] = [Xtrain.T,Xtrain.T]
        #rpool['multi_task_train_labels'] = [Y[:,0], Y[:,1]]

        rpool['subsetsize'] = str(desiredfcount)
        rpool['regparam'] = rp
        rpool['bias'] = bias
        grls = GreedyRLS.createLearner(**rpool)
        #grls = MTGreedyRLS.createLearner(**rpool)
        grls.train()
        print grls.selected
        print grls.A[grls.selected]
        print grls.b
Example #15
0
 def testRLS(self):
     print
     print
     print
     print
     print "Testing the correctness of the GreedyRLS module."
     print
     print
     floattype = float64
     
     #m, n = 10, 30
     tsize, fsize = 10, 30
     desiredfcount = 5
     Xtrain = mat(random.rand(fsize, tsize), dtype=float64)
     #Xtrain = mat(random.randint(0,10,size = (fsize, tsize)), dtype=int8)
     #save("foo",Xtrain)
     #print Xtrain
     bias = 2.
     bias_slice = sqrt(bias)*mat(ones((1,Xtrain.shape[1]), dtype=float64))
     Xtrain_biased = vstack([Xtrain,bias_slice])
     #K = Xtrain.T * Xtrain
     ylen = 2
     #Y = mat(zeros((tsize, ylen), dtype=floattype))
     Y = mat(random.rand(tsize, ylen), dtype=float64)
     #Y = mat(random.randint(0,10,size = (tsize, 2)), dtype=int8)
     #save("bar",Y)
     #print Y
     #for i in range(tsize):
     #    if Y[i,0] < 0.5: Y[i,0] = -1.
     #    else: Y[i,0] = 1.
     
     selected = []
     
     rp = 1.
     currentfcount=0
     while currentfcount < desiredfcount:
         
         selected_plus_bias = selected + [fsize]
         bestlooperf = 9999999999.
         K = Xtrain_biased[selected_plus_bias].T*Xtrain_biased[selected_plus_bias] #+ mat(ones((tsize,tsize)))
         
         for ci in range(fsize):
             if ci in selected_plus_bias: continue
             cv = Xtrain_biased[ci]
             updK = Xtrain_biased[selected_plus_bias+[ci]].T*Xtrain_biased[selected_plus_bias+[ci]] #+ mat(ones((tsize,tsize)))
             #print 1. / diag(updG)
             looperf = 0.
             #'''
             for hi in range(tsize):
                 hoinds = range(0, hi) + range(hi + 1, tsize)
                 updcutK = updK[ix_(hoinds, hoinds)]
                 updcrossK = updK[ix_([hi], hoinds)]
                 loopred = updcrossK * la.inv(updcutK + rp * mat(eye(tsize-1))) * Y[hoinds]
                 looperf += mean(multiply((loopred - Y[hi]), (loopred - Y[hi])))
             '''
             loodiff = zeros((tsize, ylen))
             updG = la.inv(updK+rp * mat(eye(tsize)))
             for hi in range(tsize):
                 updcrossK = updK[hi]
                 loopred = updcrossK * updG * Y #THIS IS TRAINING SET ERROR, NOT LOO!!!
                 looperf += mean(multiply((loopred - Y[hi]), (loopred - Y[hi])))
                 loodiff[hi] = loopred - Y[hi]
             print loodiff.T'''
             if looperf < bestlooperf:
                 bestcind = ci
                 bestlooperf = looperf
             print 'Tester ', ci, looperf
         
         selected.append(bestcind)
         print 'Tester ', selected
         currentfcount += 1
     
     selected_plus_bias = selected + [fsize]
     K = Xtrain_biased[selected_plus_bias].T*Xtrain_biased[selected_plus_bias]
     G = la.inv(K+rp * mat(eye(tsize)))
     A = Xtrain_biased[selected_plus_bias]*G*Y
     print 'Tester ', A
     #A = mat(eye(fsize+1))[:,selected_plus_bias]*(Xtrain_biased[selected_plus_bias]*A)
     
     
     rpool = {}
     class TestCallback(CF):
         def callback(self, learner):
             #print learner.performances[len(learner.performances)-1]
             print 'GreedyRLS', learner.looperf.T
             pass
     tcb = TestCallback()
     rpool['callback'] = tcb
     rpool['train_features'] = Xtrain.T
     rpool['train_labels'] = Y
     #rpool['multi_task_train_features'] = [Xtrain.T,Xtrain.T]
     #rpool['multi_task_train_labels'] = [Y[:,0], Y[:,1]]
     
     rpool['subsetsize'] = str(desiredfcount)
     rpool['regparam'] = rp
     rpool['bias'] = bias
     grls = GreedyRLS.createLearner(**rpool)
     #grls = MTGreedyRLS.createLearner(**rpool)
     grls.train()
     print grls.selected
     print grls.A[grls.selected]
     print grls.b