def testModel(self): Y = np.random.random((10)) X = np.random.random((10, 100)) kwargs = {} kwargs["Y"] = Y kwargs["X"] = X kwargs["regparam"] = 1 learner = RLS(**kwargs) model = learner.predictor print #print 'Ten data points, single label ' model = mod.LinearPredictor(np.random.random((100))) self.all_pred_cases(model) model = mod.LinearPredictor(np.random.random((100, 2))) self.all_pred_cases(model) #model = mod.LinearPredictor(np.random.random((1, 2))) #self.all_pred_cases(model) kwargs["kernel"] = "GaussianKernel" Y = np.random.random((10)) kwargs["Y"] = Y learner = RLS(**kwargs) model = learner.predictor self.all_pred_cases(model) kwargs["kernel"] = "GaussianKernel" Y = np.random.random((10, 2)) kwargs["Y"] = Y learner = RLS(**kwargs) model = learner.predictor self.all_pred_cases(model)
def callback(self, learner): m = predictor.LinearPredictor(learner.A, learner.b) P = m.predict(self.X_valid) if self.qids_valid: perfs = [] for query in self.qids_valid: try: perf = self.measure(self.Y_valid[query], P[query]) perfs.append(perf) except UndefinedPerformance: pass perf = np.mean(perfs) else: perf = self.measure(self.Y_valid, P) if self.bestperf is None or (self.measure.iserror == (perf < self.bestperf)): self.bestperf = perf self.bestA = learner.A self.last_update = 0 else: self.iter += 1 self.last_update += 1 if self.last_update == self.maxiter: learner.A = np.mat(self.bestA) raise Finished("Done")
def createModel(self, svdlearner): A = svdlearner.A A = self.reducedSetTransformation(A) fs = self.X if self.basis_vectors is not None: fs = self.basis_vectors bias = self.bias X = getPrimalDataMatrix(fs, bias) #The hyperplane is a linear combination of the feature vectors of the basis examples W = np.dot(X.T, A) if bias != 0: W_biaz = W[W.shape[0] - 1] * math.sqrt(bias) W_features = W[range(W.shape[0] - 1)] mod = predictor.LinearPredictor(W_features, W_biaz) else: mod = predictor.LinearPredictor(W, 0.) return mod
def __init__(self, X, Y, regparam=1.0, bias=1.0, callbackfun=None, **kwargs): self.Y = array_tools.as_2d_array(Y) self.X = csc_matrix(X.T) self.bias = bias self.regparam = regparam if self.bias != 0.: bias_slice = sqrt(self.bias) * np.mat( ones((1, self.X.shape[1]), dtype=np.float64)) self.X = sparse.vstack([self.X, bias_slice]).tocsc() self.X_csr = self.X.tocsr() self.callbackfun = callbackfun self.results = {} regparam = self.regparam Y = self.Y X = self.X X_csr = self.X_csr def mv(v): return X.T * (X_csr * v) + regparam * v G = LinearOperator((X.shape[1], X.shape[1]), matvec=mv, dtype=np.float64) self.AA = [] if not self.callbackfun is None: def cb(v): self.A = np.mat(v).T self.callbackfun.callback(self) else: cb = None try: self.A = np.mat(cg(G, Y, callback=cb)[0]).T except Finished: pass if self.callbackfun is not None: self.callbackfun.finished(self) self.A = X_csr * self.A if self.bias == 0.: self.b = np.mat(np.zeros((1, 1))) else: self.b = sqrt(self.bias) * self.A[-1] self.A = self.A[:-1] #self.results['predictor'] = self.getModel() self.predictor = predictor.LinearPredictor(self.A, self.b)
def __init__(self, X, train_preferences, regparam=1., **kwargs): self.regparam = regparam self.callbackfun = None self.pairs = train_preferences self.X = csc_matrix(X.T) regparam = self.regparam X = self.X.tocsc() X_csr = X.tocsr() vals = np.concatenate([ np.ones((self.pairs.shape[0]), dtype=np.float64), -np.ones( (self.pairs.shape[0]), dtype=np.float64) ]) row = np.concatenate( [np.arange(self.pairs.shape[0]), np.arange(self.pairs.shape[0])]) col = np.concatenate([self.pairs[:, 0], self.pairs[:, 1]]) coo = coo_matrix((vals, (row, col)), shape=(self.pairs.shape[0], X.shape[1])) pairs_csr = coo.tocsr() pairs_csc = coo.tocsc() def mv(v): vmat = np.mat(v).T ret = np.array(X_csr * (pairs_csc.T * (pairs_csr * (X.T * vmat)))) + regparam * vmat return ret G = LinearOperator((X.shape[0], X.shape[0]), matvec=mv, dtype=np.float64) self.As = [] M = np.mat(np.ones((self.pairs.shape[0], 1))) if not self.callbackfun is None: def cb(v): self.A = np.mat(v).T self.b = np.mat(np.zeros((1, 1))) self.callbackfun.callback() else: cb = None XLY = X_csr * (pairs_csc.T * M) self.A = np.mat(cg(G, XLY, callback=cb)[0]).T self.b = np.mat(np.zeros((1, self.A.shape[1]))) self.predictor = predictor.LinearPredictor(self.A, self.b)
def callback(self, learner): A = learner.A b = learner.bias A = learner.X_csr * A if b == 0: b = np.mat(np.zeros((1, 1))) else: b = sqrt(b) * A[-1] A = A[:-1] m = predictor.LinearPredictor(A, b) P = m.predict(self.X_valid) perf = self.measure(self.Y_valid, P) if self.bestperf is None or (self.measure.iserror == (perf < self.bestperf)): self.bestperf = perf self.bestA = learner.A self.last_update = 0 else: self.iter += 1 self.last_update += 1 if self.last_update == self.maxiter: learner.A = np.mat(self.bestA) raise Finished("Done")
def _solve_bu(self, regparam): self.regparam = regparam X = self.X Y = self.Y bias_slice = np.sqrt(self.bias) * np.mat( np.ones((1, X.shape[1]), dtype=np.float64)) su = np.sum(X, axis=1) cc = 0 tsize = self.size fsize = X.shape[0] assert X.shape[1] == tsize self.A = np.mat(np.zeros((fsize, Y.shape[1]))) rp = regparam rpinv = 1. / rp #Biaz cv = np.sqrt(self.bias) * np.mat(np.ones((1, tsize))) ca = rpinv * (1. / (1. + cv * rpinv * cv.T)) * (cv * rpinv) self.dualvec = rpinv * Y - cv.T * rpinv * ( 1. / (1. + cv * rpinv * cv.T)) * (cv * rpinv * Y) XT = X.T GXT = rpinv * XT - cv.T * rpinv * (1. / (1. + cv * rpinv * cv.T)) * ( (cv * rpinv) * XT) diagG = [] for i in range(tsize): diagGi = rpinv - cv.T[i, 0] * ca[0, i] diagG.append(diagGi) diagG = np.mat(diagG).T listX = [] for ci in range(fsize): listX.append(X[ci]) self.selected = [] currentfcount = 0 self.performances = [] while currentfcount < self.desiredfcount: if not self.measure is None: bestlooperf = None else: bestlooperf = 9999999999. self.looperf = [] for ci in range(fsize): ci_mapped = ci if ci in self.selected: continue cv = listX[ci_mapped] GXT_ci = GXT[:, ci_mapped] ca = GXT_ci * (1. / (1. + cv * GXT_ci)) updA = self.dualvec - ca * (cv * self.dualvec) invupddiagG = 1. / (diagG - np.multiply(ca, GXT_ci)) if not self.measure is None: loopred = Y - np.multiply(invupddiagG, updA) looperf_i = self.measure(Y, loopred) if bestlooperf is None: bestlooperf = looperf_i bestcind = ci if looperf_i < bestlooperf: bestcind = ci bestlooperf = looperf_i else: #This default squared performance is a bit faster to compute than the one loaded separately. loodiff = np.multiply(invupddiagG, updA) looperf_i = np.mean(np.multiply(loodiff, loodiff)) if looperf_i < bestlooperf: bestcind = ci bestlooperf = looperf_i self.looperf.append(looperf_i) self.looperf = np.mat(self.looperf) self.bestlooperf = bestlooperf self.performances.append(bestlooperf) ci_mapped = bestcind cv = listX[ci_mapped] GXT_bci = GXT[:, ci_mapped] ca = GXT_bci * (1. / (1. + cv * GXT_bci)) self.dualvec = self.dualvec - ca * (cv * self.dualvec) diagG = diagG - np.multiply(ca, GXT_bci) GXT = GXT - ca * (cv * GXT) self.selected.append(bestcind) #print self.selected #print bestlooperf currentfcount += 1 #Linear predictor with bias self.A[self.selected] = X[self.selected] * self.dualvec self.b = bias_slice * self.dualvec * np.sqrt(self.bias) self.predictor = predictor.LinearPredictor(self.A, self.b) if not self.callbackfun is None: self.callbackfun.callback(self) if not self.callbackfun is None: self.callbackfun.finished(self) self.A[self.selected] = X[self.selected] * self.dualvec self.b = bias_slice * self.dualvec * np.sqrt(self.bias) self.results[SELECTED_FEATURES] = self.selected self.results[GREEDYRLS_LOO_PERFORMANCES] = self.performances self.predictor = predictor.LinearPredictor(self.A, self.b)
def _solve_new(self, regparam, floattype=np.float64): #Legacy code. Works only with a single output but can work with given performance measures and is faster than _solve_bu if not self.Y.shape[1] == 1: raise Exception( 'This variation of GreedyRLS supports only one output at a time. The output matrix is now of shape ' + str(self.Y.shape) + '.') self.regparam = regparam X = self.X Y = np.mat(self.Y, dtype=floattype) bias_slice = np.sqrt(self.bias) * np.mat( np.ones((1, X.shape[1]), dtype=floattype)) tsize = self.size fsize = X.shape[0] assert X.shape[1] == tsize self.A = np.mat(np.zeros((fsize, 1), dtype=floattype)) rp = regparam rpinv = 1. / rp #Biaz cv = np.sqrt(self.bias) * np.mat(np.ones((1, tsize), dtype=floattype)) ca = np.mat(rpinv * (1. / (1. + cv * rpinv * cv.T)) * (cv * rpinv), dtype=floattype) self.dualvec = rpinv * Y - cv.T * rpinv * ( 1. / (1. + cv * rpinv * cv.T)) * (cv * rpinv * Y) GXT = cv.T * np.mat( (rpinv * (1. / (1. + cv * rpinv * cv.T)) * (cv * rpinv)) * X.T, dtype=floattype) tempmatrix = np.mat(np.zeros(X.T.shape, dtype=floattype)) np.multiply(X.T, rpinv, tempmatrix) #tempmatrix = rpinv * X.T np.subtract(tempmatrix, GXT, GXT) diagG = [] for i in range(tsize): diagGi = rpinv - cv.T[i, 0] * ca[0, i] diagG.append(diagGi) diagG = np.mat(diagG, dtype=floattype).T self.selected = [] self.performances = [] currentfcount = 0 temp2 = np.mat(np.zeros(tempmatrix.shape, dtype=floattype)) while currentfcount < self.desiredfcount: np.multiply(X.T, GXT, tempmatrix) XGXTdiag = np.sum(tempmatrix, axis=0) XGXTdiag = 1. / (1. + XGXTdiag) np.multiply(GXT, XGXTdiag, tempmatrix) tempvec1 = np.multiply((X * self.dualvec).T, XGXTdiag) np.multiply(GXT, tempvec1, temp2) np.subtract(self.dualvec, temp2, temp2) np.multiply(tempmatrix, GXT, tempmatrix) np.subtract(diagG, tempmatrix, tempmatrix) np.divide(1, tempmatrix, tempmatrix) np.multiply(tempmatrix, temp2, tempmatrix) if not self.measure is None: np.subtract(Y, tempmatrix, tempmatrix) np.multiply(temp2, 0, temp2) np.add(temp2, Y, temp2) looperf = self.measure.multiTaskPerformance(temp2, tempmatrix) looperf = np.mat(looperf, dtype=floattype) if self.measure.iserror: looperf[0, self.selected] = float('inf') bestcind = np.argmin(looperf) self.bestlooperf = np.amin(looperf) else: looperf[0, self.selected] = -float('inf') bestcind = np.argmax(looperf) self.bestlooperf = np.amax(looperf) else: np.multiply(tempmatrix, tempmatrix, temp2) looperf = np.sum(temp2, axis=0) looperf[0, self.selected] = float('inf') bestcind = np.argmin(looperf) self.bestlooperf = np.amin(looperf) self.loo_predictions = Y - tempmatrix[:, bestcind] self.looperf = looperf #Needed in test_GreedyRLS module self.performances.append(self.bestlooperf) cv = X[bestcind] GXT_bci = GXT[:, bestcind] ca = GXT_bci * (1. / (1. + cv * GXT_bci)) self.dualvec = self.dualvec - ca * (cv * self.dualvec) diagG = diagG - np.multiply(ca, GXT_bci) np.multiply(tempmatrix, 0, tempmatrix) np.add(tempmatrix, ca, tempmatrix) tempvec1 = cv * GXT np.multiply(tempmatrix, tempvec1, tempmatrix) np.subtract(GXT, tempmatrix, GXT) self.selected.append(bestcind) currentfcount += 1 #Linear predictor with bias self.A[self.selected] = X[self.selected] * self.dualvec self.b = bias_slice * self.dualvec * np.sqrt(self.bias) self.predictor = predictor.LinearPredictor(self.A, self.b) if not self.callbackfun is None: self.callbackfun.callback(self) if not self.callbackfun is None: self.callbackfun.finished(self) self.A[self.selected] = X[self.selected] * self.dualvec self.b = bias_slice * self.dualvec * np.sqrt(self.bias) self.results[SELECTED_FEATURES] = self.selected self.results[GREEDYRLS_LOO_PERFORMANCES] = self.performances #self.results['predictor'] = self.getModel() self.predictor = predictor.LinearPredictor(self.A, self.b)
def _solve_cython(self, regparam): self.regparam = regparam X = self.X Y = self.Y bias_slice = np.sqrt(self.bias) * np.mat( np.ones((1, X.shape[1]), dtype=np.float64)) tsize = self.size fsize = X.shape[0] assert X.shape[1] == tsize self.A = np.mat(np.zeros((fsize, Y.shape[1]))) rp = regparam rpinv = 1. / rp #Biaz cv = np.sqrt(self.bias) * np.mat(np.ones((1, tsize))) ca = rpinv * (1. / (1. + cv * rpinv * cv.T)) * (cv * rpinv) self.dualvec = rpinv * Y - cv.T * rpinv * ( 1. / (1. + cv * rpinv * cv.T)) * (cv * rpinv * Y) XT = X.T GXT = rpinv * XT - cv.T * rpinv * (1. / (1. + cv * rpinv * cv.T)) * ( (cv * rpinv) * XT) diagG = [] for i in range(tsize): diagGi = rpinv - cv.T[i, 0] * ca[0, i] diagG.append(diagGi) diagG = np.array(diagG) listX = [] for ci in range(fsize): listX.append(X[ci]) self.selected = [] currentfcount = 0 self.performances = [] selectedvec = np.zeros(fsize, dtype=np.int16) tempvec1, tempvec2, tempvec3 = np.zeros(tsize), np.zeros( Y.shape[1]), np.zeros((tsize, Y.shape[1])) while currentfcount < self.desiredfcount: if not self.measure is None: self.bestlooperf = None else: self.bestlooperf = 9999999999. self.looperf = np.ones(fsize) * float('Inf') #''' bestcind = _greedy_rls.find_optimal_feature( np.array(Y), np.array(X), np.array(GXT), diagG, np.array(self.dualvec), self.looperf, fsize, tsize, Y.shape[1], selectedvec, tempvec1, tempvec2, tempvec3) #Reference code ''' diagG = np.mat(diagG).T for ci in allinds: if ci in self.selected: continue cv = listX[ci_mapped] GXT_ci = GXT[:, ci_mapped] ca = GXT_ci * (1. / (1. + cv * GXT_ci)) updA = self.dualvec - ca * (cv * self.dualvec) invupddiagG = 1. / (diagG - np.multiply(ca, GXT_ci)) if not self.measure is None: loopred = Y - np.multiply(invupddiagG, updA) looperf_i = self.measure.multiOutputPerformance(Y, loopred) if self.bestlooperf is None: self.bestlooperf = looperf_i bestcind = ci if self.measure.comparePerformances(looperf_i, self.bestlooperf) > 0: bestcind = ci self.bestlooperf = looperf_i else: #This default squared performance is a bit faster to compute than the one loaded separately. loodiff = np.multiply(invupddiagG, updA) print loodiff foo looperf_i = mean(np.multiply(loodiff, loodiff)) if looperf_i < self.bestlooperf: bestcind = ci self.bestlooperf = looperf_i self.looperf[ci] = looperf_i ''' #''' self.bestlooperf = self.looperf[bestcind] self.looperf = np.mat(self.looperf) self.performances.append(self.bestlooperf) ci_mapped = bestcind cv = listX[ci_mapped] GXT_bci = GXT[:, ci_mapped] ca = GXT_bci * (1. / (1. + cv * GXT_bci)) self.dualvec = self.dualvec - ca * (cv * self.dualvec) diagG = diagG - np.array(np.multiply(ca, GXT_bci)).reshape( (self.size)) GXT = GXT - ca * (cv * GXT) self.selected.append(bestcind) currentfcount += 1 #Linear predictor with bias self.A[self.selected] = X[self.selected] * self.dualvec self.b = bias_slice * self.dualvec * np.sqrt(self.bias) self.predictor = predictor.LinearPredictor(self.A, self.b) if not self.callbackfun is None: self.callbackfun.callback(self) if not self.callbackfun is None: self.callbackfun.finished(self) self.A[self.selected] = X[self.selected] * self.dualvec self.b = bias_slice * self.dualvec * np.sqrt(self.bias) self.results[SELECTED_FEATURES] = self.selected self.results[GREEDYRLS_LOO_PERFORMANCES] = self.performances self.predictor = predictor.LinearPredictor(self.A, self.b)
def getModel(self): return predictor.LinearPredictor(self.A, self.b)
def solve_weak(self, regparam): X = self.X Y = self.Y tsize = self.size fsize = X.shape[0] assert X.shape[1] == tsize self.A = np.mat(np.zeros((fsize, Y.shape[1]))) rp = regparam rpinv = 1. / rp desiredfcount = self.desiredfcount if not fsize >= desiredfcount: raise Exception('The overall number of features ' + str(fsize) + ' is smaller than the desired number ' + str(desiredfcount) + ' of features to be selected.') #Biaz bias_slice = np.sqrt(self.bias) * np.mat( np.ones((1, X.shape[1]), dtype=np.float64)) cv = np.sqrt(self.bias) * np.mat(np.ones((1, tsize))) ca = rpinv * (1. / (1. + cv * rpinv * cv.T)) * (cv * rpinv) self.dualvec = rpinv * Y - cv.T * rpinv * ( 1. / (1. + cv * rpinv * cv.T)) * (cv * rpinv * Y) diagG = [] for i in range(tsize): diagGi = rpinv - cv.T[i, 0] * ca[0, i] diagG.append(diagGi) diagG = np.mat(diagG).T U, S, VT = la.svd(cv, full_matrices=False) U, S, VT = np.mat(U), np.mat(S), np.mat(VT) Omega = 1. / (S * S + rp) - rpinv self.selected = [] notselected = set(range(fsize)) currentfcount = 0 self.performances = [] while currentfcount < desiredfcount: if not self.measure is None: bestlooperf = None else: bestlooperf = float('inf') X_s = X[self.selected] self.looperf = [] #sample_60 = pyrandom.sample(notselected, len(notselected)) #sample_60 = sorted(sample_60) #print sample_60 sample_60 = pyrandom.sample(notselected, 60) for ci in sample_60: cv = X[ci] GXT_ci = VT.T * np.multiply(Omega.T, (VT * cv.T)) + rpinv * cv.T ca = GXT_ci * (1. / (1. + cv * GXT_ci)) updA = self.dualvec - ca * (cv * self.dualvec) invupddiagG = 1. / (diagG - np.multiply(ca, GXT_ci)) if not self.measure is None: loopred = Y - np.multiply(invupddiagG, updA) looperf_i = self.measure.multiOutputPerformance(Y, loopred) if bestlooperf is None: bestlooperf = looperf_i bestcind = ci if self.measure.comparePerformances( looperf_i, bestlooperf) > 0: bestcind = ci bestlooperf = looperf_i else: #This default squared performance is a bit faster to compute than the one loaded separately. loodiff = np.multiply(invupddiagG, updA) looperf_i = np.mean( np.sum(np.multiply(loodiff, loodiff), axis=0)) if looperf_i < bestlooperf: bestcind = ci bestlooperf = looperf_i bestlooperf = looperf_i self.looperf.append(looperf_i) self.looperf = np.mat(self.looperf) self.bestlooperf = bestlooperf print bestlooperf self.performances.append(bestlooperf) cv = X[bestcind] GXT_bci = VT.T * np.multiply(Omega.T, (VT * cv.T)) + rpinv * cv.T ca = GXT_bci * (1. / (1. + cv * GXT_bci)) self.dualvec = self.dualvec - ca * (cv * self.dualvec) diagG = diagG - np.multiply(ca, GXT_bci) #GXT = GXT - ca * (cv * GXT) self.selected.append(bestcind) notselected.remove(bestcind) X_sel = X[self.selected] if isinstance(X_sel, sp.base.spmatrix): X_sel = X_sel.todense() U, S, VT = la.svd(np.vstack([X_sel, bias_slice]), full_matrices=False) U, S, VT = np.mat(U), np.mat(S), np.mat(VT) Omega = 1. / (np.multiply(S, S) + rp) - rpinv currentfcount += 1 #Linear predictor with bias self.A[self.selected] = X[self.selected] * self.dualvec self.b = bias_slice * self.dualvec if not self.callbackfun is None: self.callbackfun.callback(self) if not self.callbackfun is None: self.callbackfun.finished(self) self.A[self.selected] = X[self.selected] * self.dualvec self.b = bias_slice * self.dualvec self.results['selected_features'] = self.selected self.results['GreedyRLS_LOO_performances'] = self.performances self.predictor = predictor.LinearPredictor(self.A, self.b)
def solve_tradeoff(self, regparam): """Trains RLS with the given value of the regularization parameter @param regparam: value of the regularization parameter @type regparam: float """ self.regparam = regparam X = self.X Y = self.Y if not hasattr(self, "bias"): self.bias = 0. tsize = self.size fsize = X.shape[0] assert X.shape[1] == tsize self.A = np.mat(np.zeros((fsize, Y.shape[1]))) rp = regparam rpinv = 1. / rp if not self.resource_pool.has_key('subsetsize'): raise Exception("Parameter 'subsetsize' must be given.") desiredfcount = int(self.resource_pool['subsetsize']) if not fsize >= desiredfcount: raise Exception('The overall number of features ' + str(fsize) + ' is smaller than the desired number ' + str(desiredfcount) + ' of features to be selected.') #Biaz bias_slice = np.sqrt(self.bias) * np.mat( np.ones((1, X.shape[1]), dtype=np.float64)) cv = bias_slice ca = rpinv * (1. / (1. + cv * rpinv * cv.T)) * (cv * rpinv) self.dualvec = rpinv * Y - cv.T * rpinv * ( 1. / (1. + cv * rpinv * cv.T)) * (cv * rpinv * Y) diagG = [] for i in range(tsize): diagGi = rpinv - cv.T[i, 0] * ca[0, i] diagG.append(diagGi) diagG = np.mat(diagG).T #listX = [] #for ci in range(fsize): # listX.append(X[ci]) U, S, VT = la.svd(cv, full_matrices=False) U, S, VT = np.mat(U), np.mat(S), np.mat(VT) Omega = 1. / (S * S + rp) - rpinv self.selected = [] blocksize = 1000 blocks = [] blockcount = 0 while True: startind = blockcount * blocksize if (blockcount + 1) * blocksize < fsize: print blockcount, fsize, (blockcount + 1) * blocksize endind = (blockcount + 1) * blocksize blocks.append(range(startind, endind)) blockcount += 1 else: blocks.append(range(startind, fsize)) blockcount += 1 break currentfcount = 0 self.performances = [] while currentfcount < desiredfcount: if not self.measure is None: self.bestlooperf = None else: self.bestlooperf = float('inf') looperf = np.mat(np.zeros((1, fsize))) for blockind in range(blockcount): block = blocks[blockind] tempmatrix = np.mat(np.zeros((tsize, len(block)))) temp2 = np.mat(np.zeros((tsize, len(block)))) X_block = X[block] GXT_block = VT.T * np.multiply( Omega.T, (VT * X_block.T)) + rpinv * X_block.T np.multiply(X_block.T, GXT_block, tempmatrix) XGXTdiag = sum(tempmatrix, axis=0) XGXTdiag = 1. / (1. + XGXTdiag) np.multiply(GXT_block, XGXTdiag, tempmatrix) tempvec1 = np.multiply((X_block * self.dualvec).T, XGXTdiag) np.multiply(GXT_block, tempvec1, temp2) np.subtract(self.dualvec, temp2, temp2) np.multiply(tempmatrix, GXT_block, tempmatrix) np.subtract(diagG, tempmatrix, tempmatrix) np.divide(1, tempmatrix, tempmatrix) np.multiply(tempmatrix, temp2, tempmatrix) if not self.measure is None: np.subtract(Y, tempmatrix, tempmatrix) np.multiply(temp2, 0, temp2) np.add(temp2, Y, temp2) looperf_block = self.measure.multiTaskPerformance( temp2, tempmatrix) looperf_block = np.mat(looperf_block) else: np.multiply(tempmatrix, tempmatrix, tempmatrix) looperf_block = sum(tempmatrix, axis=0) looperf[:, block] = looperf_block if not self.measure is None: if self.measure.isErrorMeasure(): looperf[0, self.selected] = float('inf') bestcind = np.argmin(looperf) self.bestlooperf = np.amin(looperf) else: looperf[0, self.selected] = -float('inf') bestcind = np.argmax(looperf) self.bestlooperf = np.amax(looperf) else: looperf[0, self.selected] = float('inf') bestcind = np.argmin(looperf) self.bestlooperf = np.amin(looperf) self.looperf = looperf self.performances.append(self.bestlooperf) #cv = listX[bestcind] cv = X[bestcind] #GXT_bci = GXT[:, bestcind] GXT_bci = VT.T * np.multiply(Omega.T, (VT * cv.T)) + rpinv * cv.T ca = GXT_bci * (1. / (1. + cv * GXT_bci)) self.dualvec = self.dualvec - ca * (cv * self.dualvec) diagG = diagG - np.multiply(ca, GXT_bci) #GXT = GXT - ca * (cv * GXT) self.selected.append(bestcind) X_sel = X[self.selected] if isinstance(X_sel, sp.base.spmatrix): X_sel = X_sel.todense() U, S, VT = la.svd(np.vstack([X_sel, bias_slice]), full_matrices=False) U, S, VT = np.mat(U), np.mat(S), np.mat(VT) #print U.shape, S.shape, VT.shape Omega = 1. / (np.multiply(S, S) + rp) - rpinv #print self.selected #print self.performances currentfcount += 1 #Linear predictor with bias self.A[self.selected] = X[self.selected] * self.dualvec self.b = bias_slice * self.dualvec self.callback() #print who(locals()) self.finished() self.A[self.selected] = X[self.selected] * self.dualvec self.b = bias_slice * self.dualvec self.results['selected_features'] = self.selected self.results['GreedyRLS_LOO_performances'] = self.performances #self.results['predictor'] = self.getModel() self.predictor = predictor.LinearPredictor(self.A, self.b)
def __init__(self, X, Y, regparam=1.0, qids=None, callbackfun=None, **kwargs): self.regparam = regparam self.callbackfun = None self.Y = array_tools.as_2d_array(Y) #Number of training examples self.size = Y.shape[0] if self.Y.shape[1] > 1: raise Exception( 'CGRankRLS does not currently work in multi-label mode') self.learn_from_labels = True self.callbackfun = callbackfun self.X = csc_matrix(X.T) if qids is not None: self.qids = map_qids(qids) self.splits = qids_to_splits(self.qids) else: self.qids = None regparam = self.regparam qids = self.qids if qids is not None: P = sp.lil_matrix((self.size, len(set(qids)))) for qidind in range(len(self.splits)): inds = self.splits[qidind] qsize = len(inds) for i in inds: P[i, qidind] = 1. / sqrt(qsize) P = P.tocsr() PT = P.tocsc().T else: P = 1. / sqrt(self.size) * (np.mat( np.ones((self.size, 1), dtype=np.float64))) PT = P.T X = self.X.tocsc() X_csr = X.tocsr() def mv(v): v = np.mat(v).T return X_csr * (X.T * v) - X_csr * (P * (PT * (X.T * v))) + regparam * v G = LinearOperator((X.shape[0], X.shape[0]), matvec=mv, dtype=np.float64) Y = self.Y if not self.callbackfun is None: def cb(v): self.A = np.mat(v).T self.b = np.mat(np.zeros((1, 1))) self.callbackfun.callback(self) else: cb = None XLY = X_csr * Y - X_csr * (P * (PT * Y)) try: self.A = np.mat(cg(G, XLY, callback=cb)[0]).T except Finished: pass self.b = np.mat(np.zeros((1, 1))) self.predictor = predictor.LinearPredictor(self.A, self.b)
def createModel(self, svdlearner): A = svdlearner.A A = self.reducedSetTransformation(A) mod = predictor.LinearPredictor(A, 0.) return mod