def greedyRLS(XPath, yPath, metaPath, fcount=5, scount=50, resultPath=None): X, Y = readAuto(XPath, yPath) meta = {} if metaPath != None: print "Loading metadata from", metaPath meta = result.getMeta(metaPath) X_train, X_hidden, Y_train, Y_hidden = hidden.split(X, Y, meta=meta) #if "classes" in meta: # print "Class distribution = ", getClassDistribution(y) #logrps = range(15, 25) logrps = range(15, 26) print "Training RLS" loopCount = 1 best_perf = -1 best_logrp = None best_scount = None for logrp in logrps: kf = KFold(len(Y_train), n_folds=fcount, indices=True, shuffle=True, random_state=77) for train, test in kf: perfs = [] print "------------ Processing fold", str(loopCount) + "/" + str(fcount), "------------" kwargs = {} kwargs['train_features'] = X_train[train] kwargs['train_labels'] = Y_train[train] kwargs['subsetsize'] = scount kwargs['regparam'] = 2.**logrp kwargs['bias'] = 1 cb = CallbackFunction(X_train[test], Y_train[test]) kwargs['callback_obj'] = cb rls = GreedyRLS.createLearner(**kwargs) rls.train() perfs.append(cb.perfs) loopCount += 1 print "---------------------------------------------------" perfs = np.mean(perfs, axis=0) perf = np.max(perfs) perf = perfs[-1] sc = np.argmax(perfs)+1 print "%f AUC, %d logrp, %d selected" %(perf, logrp, sc) if perf>best_perf: best_perf = perf best_logrp = logrp best_scount = sc kwargs = {} kwargs['train_features'] = X_train kwargs['train_labels'] = Y_train kwargs['subsetsize'] = scount kwargs['regparam'] = 2.**best_logrp kwargs['bias'] = 1 cb = CallbackFunction(X_hidden, Y_hidden) kwargs['callback_obj'] = cb rls = GreedyRLS.createLearner(**kwargs) rls.train() perfs = cb.perfs selected = rls.selected model = rls.getModel() #if resultPath != None: # saveResults(meta, resultPath, perfs, selected) return model, perfs, selected, best_logrp, best_scount
def speedtest(): tsize, fsize = 3000, 3000 desiredfcount = 5 Xtrain = mat(random.rand(fsize, tsize), dtype=float64) #Xtrain = mat(random.randint(0,10,size = (fsize, tsize)), dtype=int8) #save("foo",Xtrain) bias = 2. rp = 1. bias_slice = sqrt(bias)*mat(ones((1,Xtrain.shape[1]), dtype=float64)) Xtrain_biased = vstack([Xtrain,bias_slice]) #K = Xtrain.T * Xtrain ylen = 2 #Y = mat(zeros((tsize, ylen), dtype=floattype)) Y = mat(random.rand(tsize, ylen), dtype=float64) rpool = {} class TestCallback(CF): def callback(self, learner): #print learner.performances[len(learner.performances)-1] #print 'GreedyRLS', learner.looperf.T print 'round' tcb = TestCallback() rpool['callback'] = tcb rpool['train_features'] = Xtrain.T rpool['train_labels'] = Y rpool['subsetsize'] = str(desiredfcount) rpool['regparam'] = rp rpool['bias'] = bias grls = GreedyRLS.createLearner(**rpool) grls.train() print grls.selected print grls.A[grls.selected] print grls.b
def speedtest(): tsize, fsize = 3000, 3000 desiredfcount = 5 Xtrain = mat(random.rand(fsize, tsize), dtype=float64) #Xtrain = mat(random.randint(0,10,size = (fsize, tsize)), dtype=int8) #save("foo",Xtrain) bias = 2. rp = 1. bias_slice = sqrt(bias) * mat(ones((1, Xtrain.shape[1]), dtype=float64)) Xtrain_biased = vstack([Xtrain, bias_slice]) #K = Xtrain.T * Xtrain ylen = 2 #Y = mat(zeros((tsize, ylen), dtype=floattype)) Y = mat(random.rand(tsize, ylen), dtype=float64) rpool = {} class TestCallback(CF): def callback(self, learner): #print learner.performances[len(learner.performances)-1] #print 'GreedyRLS', learner.looperf.T print 'round' tcb = TestCallback() rpool['callback'] = tcb rpool['train_features'] = Xtrain.T rpool['train_labels'] = Y rpool['subsetsize'] = str(desiredfcount) rpool['regparam'] = rp rpool['bias'] = bias grls = GreedyRLS.createLearner(**rpool) grls.train() print grls.selected print grls.A[grls.selected] print grls.b
def testRLS(self): print print print print print "Testing the correctness of the GreedyRLS module." print print floattype = float64 #m, n = 10, 30 tsize, fsize = 10, 30 desiredfcount = 5 Xtrain = mat(random.rand(fsize, tsize), dtype=float64) #Xtrain = mat(random.randint(0,10,size = (fsize, tsize)), dtype=int8) #save("foo",Xtrain) #print Xtrain bias = 2. bias_slice = sqrt(bias) * mat(ones( (1, Xtrain.shape[1]), dtype=float64)) Xtrain_biased = vstack([Xtrain, bias_slice]) #K = Xtrain.T * Xtrain ylen = 2 #Y = mat(zeros((tsize, ylen), dtype=floattype)) Y = mat(random.rand(tsize, ylen), dtype=float64) #Y = mat(random.randint(0,10,size = (tsize, 2)), dtype=int8) #save("bar",Y) #print Y #for i in range(tsize): # if Y[i,0] < 0.5: Y[i,0] = -1. # else: Y[i,0] = 1. selected = [] rp = 1. currentfcount = 0 while currentfcount < desiredfcount: selected_plus_bias = selected + [fsize] bestlooperf = 9999999999. K = Xtrain_biased[selected_plus_bias].T * Xtrain_biased[ selected_plus_bias] #+ mat(ones((tsize,tsize))) for ci in range(fsize): if ci in selected_plus_bias: continue cv = Xtrain_biased[ci] updK = Xtrain_biased[selected_plus_bias + [ci]].T * Xtrain_biased[ selected_plus_bias + [ci]] #+ mat(ones((tsize,tsize))) #print 1. / diag(updG) looperf = 0. #''' for hi in range(tsize): hoinds = range(0, hi) + range(hi + 1, tsize) updcutK = updK[ix_(hoinds, hoinds)] updcrossK = updK[ix_([hi], hoinds)] loopred = updcrossK * la.inv( updcutK + rp * mat(eye(tsize - 1))) * Y[hoinds] looperf += mean( multiply((loopred - Y[hi]), (loopred - Y[hi]))) ''' loodiff = zeros((tsize, ylen)) updG = la.inv(updK+rp * mat(eye(tsize))) for hi in range(tsize): updcrossK = updK[hi] loopred = updcrossK * updG * Y #THIS IS TRAINING SET ERROR, NOT LOO!!! looperf += mean(multiply((loopred - Y[hi]), (loopred - Y[hi]))) loodiff[hi] = loopred - Y[hi] print loodiff.T''' if looperf < bestlooperf: bestcind = ci bestlooperf = looperf print 'Tester ', ci, looperf selected.append(bestcind) print 'Tester ', selected currentfcount += 1 selected_plus_bias = selected + [fsize] K = Xtrain_biased[selected_plus_bias].T * Xtrain_biased[ selected_plus_bias] G = la.inv(K + rp * mat(eye(tsize))) A = Xtrain_biased[selected_plus_bias] * G * Y print 'Tester ', A #A = mat(eye(fsize+1))[:,selected_plus_bias]*(Xtrain_biased[selected_plus_bias]*A) rpool = {} class TestCallback(CF): def callback(self, learner): #print learner.performances[len(learner.performances)-1] print 'GreedyRLS', learner.looperf.T pass tcb = TestCallback() rpool['callback'] = tcb rpool['train_features'] = Xtrain.T rpool['train_labels'] = Y #rpool['multi_task_train_features'] = [Xtrain.T,Xtrain.T] #rpool['multi_task_train_labels'] = [Y[:,0], Y[:,1]] rpool['subsetsize'] = str(desiredfcount) rpool['regparam'] = rp rpool['bias'] = bias grls = GreedyRLS.createLearner(**rpool) #grls = MTGreedyRLS.createLearner(**rpool) grls.train() print grls.selected print grls.A[grls.selected] print grls.b
def testRLS(self): print print print print print "Testing the correctness of the GreedyRLS module." print print floattype = float64 #m, n = 10, 30 tsize, fsize = 10, 30 desiredfcount = 5 Xtrain = mat(random.rand(fsize, tsize), dtype=float64) #Xtrain = mat(random.randint(0,10,size = (fsize, tsize)), dtype=int8) #save("foo",Xtrain) #print Xtrain bias = 2. bias_slice = sqrt(bias)*mat(ones((1,Xtrain.shape[1]), dtype=float64)) Xtrain_biased = vstack([Xtrain,bias_slice]) #K = Xtrain.T * Xtrain ylen = 2 #Y = mat(zeros((tsize, ylen), dtype=floattype)) Y = mat(random.rand(tsize, ylen), dtype=float64) #Y = mat(random.randint(0,10,size = (tsize, 2)), dtype=int8) #save("bar",Y) #print Y #for i in range(tsize): # if Y[i,0] < 0.5: Y[i,0] = -1. # else: Y[i,0] = 1. selected = [] rp = 1. currentfcount=0 while currentfcount < desiredfcount: selected_plus_bias = selected + [fsize] bestlooperf = 9999999999. K = Xtrain_biased[selected_plus_bias].T*Xtrain_biased[selected_plus_bias] #+ mat(ones((tsize,tsize))) for ci in range(fsize): if ci in selected_plus_bias: continue cv = Xtrain_biased[ci] updK = Xtrain_biased[selected_plus_bias+[ci]].T*Xtrain_biased[selected_plus_bias+[ci]] #+ mat(ones((tsize,tsize))) #print 1. / diag(updG) looperf = 0. #''' for hi in range(tsize): hoinds = range(0, hi) + range(hi + 1, tsize) updcutK = updK[ix_(hoinds, hoinds)] updcrossK = updK[ix_([hi], hoinds)] loopred = updcrossK * la.inv(updcutK + rp * mat(eye(tsize-1))) * Y[hoinds] looperf += mean(multiply((loopred - Y[hi]), (loopred - Y[hi]))) ''' loodiff = zeros((tsize, ylen)) updG = la.inv(updK+rp * mat(eye(tsize))) for hi in range(tsize): updcrossK = updK[hi] loopred = updcrossK * updG * Y #THIS IS TRAINING SET ERROR, NOT LOO!!! looperf += mean(multiply((loopred - Y[hi]), (loopred - Y[hi]))) loodiff[hi] = loopred - Y[hi] print loodiff.T''' if looperf < bestlooperf: bestcind = ci bestlooperf = looperf print 'Tester ', ci, looperf selected.append(bestcind) print 'Tester ', selected currentfcount += 1 selected_plus_bias = selected + [fsize] K = Xtrain_biased[selected_plus_bias].T*Xtrain_biased[selected_plus_bias] G = la.inv(K+rp * mat(eye(tsize))) A = Xtrain_biased[selected_plus_bias]*G*Y print 'Tester ', A #A = mat(eye(fsize+1))[:,selected_plus_bias]*(Xtrain_biased[selected_plus_bias]*A) rpool = {} class TestCallback(CF): def callback(self, learner): #print learner.performances[len(learner.performances)-1] print 'GreedyRLS', learner.looperf.T pass tcb = TestCallback() rpool['callback'] = tcb rpool['train_features'] = Xtrain.T rpool['train_labels'] = Y #rpool['multi_task_train_features'] = [Xtrain.T,Xtrain.T] #rpool['multi_task_train_labels'] = [Y[:,0], Y[:,1]] rpool['subsetsize'] = str(desiredfcount) rpool['regparam'] = rp rpool['bias'] = bias grls = GreedyRLS.createLearner(**rpool) #grls = MTGreedyRLS.createLearner(**rpool) grls.train() print grls.selected print grls.A[grls.selected] print grls.b