def min_wrapper(hyp, F, Flag, *varargin): # Utilize scipy.optimize functions to minimize the negative log marginal liklihood. This is REALLY inefficient! x = convert_to_array(hyp) # Converts the hyperparameter class to an array if Flag == 'CG': aa = cg(nlml, x, dnlml, (F,hyp,varargin), maxiter=100, disp=True, full_output=True) x = aa[0]; fx = aa[1]; funcCalls = aa[2]; gradcalls = aa[3] if aa[4] == 1: print "Maximum number of iterations exceeded." elif aa[4] == 2: print "Gradient and/or function calls not changing." gvals = dnlml(x,F,hyp,varargin) return convert_to_class(x,hyp), fx, gvals, funcCalls elif Flag == 'BFGS': # Use BFGS aa = bfgs(nlml, x, dnlml, (F,hyp,varargin), maxiter=100, disp=False, full_output=True) x = aa[0]; fvals = aa[1]; gvals = aa[2]; Bopt = aa[3]; funcCalls = aa[4]; gradcalls = aa[5] if aa[6] == 1: print "Maximum number of iterations exceeded." elif aa[6] == 2: print "Gradient and/or function calls not changing." return convert_to_class(x,hyp), fvals, gvals, funcCalls elif Flag == 'SCG': # Use SCG aa = scg(x, nlml, dnlml, (F,hyp,varargin),niters=40) x = aa[0]; fvals = aa[1] gvals = dnlml(x,F,hyp,varargin) return convert_to_class(x,hyp), fvals, gvals else: raise Exception('Incorrect usage of optimization flag in min_wrapper')
def min_wrapper(hyp, F, Flag, *varargin): # Utilize scipy.optimize functions to minimize the negative log marginal liklihood. This is REALLY inefficient! x = convert_to_array(hyp) # Converts the hyperparameter class to an array if Flag == 'CG': aa = cg(nlml, x, dnlml, (F, hyp, varargin), maxiter=100, disp=True, full_output=True) x = aa[0] fx = aa[1] funcCalls = aa[2] gradcalls = aa[3] if aa[4] == 1: print "Maximum number of iterations exceeded." elif aa[4] == 2: print "Gradient and/or function calls not changing." gvals = dnlml(x, F, hyp, varargin) return convert_to_class(x, hyp), fx, gvals, funcCalls elif Flag == 'BFGS': # Use BFGS aa = bfgs(nlml, x, dnlml, (F, hyp, varargin), maxiter=100, disp=False, full_output=True) x = aa[0] fvals = aa[1] gvals = aa[2] Bopt = aa[3] funcCalls = aa[4] gradcalls = aa[5] if aa[6] == 1: print "Maximum number of iterations exceeded." elif aa[6] == 2: print "Gradient and/or function calls not changing." return convert_to_class(x, hyp), fvals, gvals, funcCalls elif Flag == 'SCG': # Use SCG aa = scg(x, nlml, dnlml, (F, hyp, varargin), niters=40) x = aa[0] fvals = aa[1] gvals = dnlml(x, F, hyp, varargin) return convert_to_class(x, hyp), fvals, gvals else: raise Exception('Incorrect usage of optimization flag in min_wrapper')
def min_wrapper(hyp, F, Flag, *varargin): # Utilize scipy.optimize functions, sgc.py, or minimize.py to # minimize the negative log marginal liklihood. x = convert_to_array(hyp) # convert the hyperparameter class to an array if Flag == 'CG': aa = cg(nlml, x, dnlml, (F,hyp,varargin), maxiter=100, disp=False, full_output=True) x = aa[0]; fopt = aa[1]; funcCalls = aa[2]; gradcalls = aa[3] if aa[4] == 1: print "Maximum number of iterations exceeded." elif aa[4] == 2: print "Gradient and/or function calls not changing." gopt = dnlml(x,F,hyp,varargin) return convert_to_class(x,hyp), fopt, gopt, funcCalls elif Flag == 'BFGS': # Use BFGS aa = bfgs(nlml, x, dnlml, (F,hyp,varargin), maxiter=100, disp=False, full_output=True) x = aa[0]; fopt = aa[1]; gopt = aa[2]; Bopt = aa[3]; funcCalls = aa[4]; gradcalls = aa[5] if aa[6] == 1: print "Maximum number of iterations exceeded." elif aa[6] == 2: print "Gradient and/or function calls not changing." if isinstance(fopt, ndarray): fopt = fopt[0] return convert_to_class(x,hyp), fopt, gopt, funcCalls elif Flag == 'SCG': # use sgc.py aa = scg(x, nlml, dnlml, (F,hyp,varargin), niters = 100) hyp = convert_to_class(aa[0],hyp) fopt = aa[1][-1] gopt = dnlml(aa[0],F,hyp,varargin) return hyp, fopt, gopt, len(aa[1]) elif Flag == 'Minimize': # use minimize.py aa = run(x, nlml, dnlml, (F,hyp,varargin), maxnumfuneval=-100) hyp = convert_to_class(aa[0],hyp) fopt = aa[1][-1] gopt = dnlml(aa[0],F,hyp,varargin) return hyp, fopt, gopt, len(aa[1]) else: raise Exception('Incorrect usage of optimization flag in min_wrapper')
def gp_train(gp, X, y, R=None, w=None, Flag = None): ''' gp_train() returns the learnt hyperparameters. Following chapter 5.4.1 in Rasmussen and Williams: GPs for ML (2006). The original version (MATLAB implementation) of used optimizer minimize.m is copyright (C) 1999 - 2006, Carl Edward Rasmussen. The used python versions are in scipy.optimize Input R and w is needed for XGP regression! ''' # Build the parameter list that we will optimize theta = np.concatenate((gp['meantheta'],gp['covtheta'])) if Flag == 'CG': aa = cg(nlml, theta, dnlml, [gp,X,y,R,w], maxiter=100, disp=False, full_output=True) theta = aa[0]; fvals = aa[1]; funcCalls = aa[2]; gradcalls = aa[3] gvals = dnlml(theta, gp, X, y, R, w) if aa[4] == 1: print "Maximum number of iterations exceeded." elif aa[4] == 2: print "Gradient and/or function calls not changing." mt = len(gp['meantheta']) gp['meantheta'] = theta[:mt] gp['covtheta'] = theta[mt:] return gp, fvals, gvals, funcCalls elif Flag == 'BFGS': # Use BFGS #aa = bfgs(nlml, theta, dnlml, [gp,X,y,R,w], maxiter=100, disp=False, full_output=True) aa = bfgs(nlml, theta, dnlml, [gp,X,y,R,w], maxiter=100, disp=True, full_output=True) theta = aa[0]; fvals = aa[1]; gvals = aa[2]; Bopt = aa[3]; funcCalls = aa[4]; gradcalls = aa[5] if aa[6] == 1: print "Maximum number of iterations exceeded." elif aa[6] == 2: print "Gradient and/or function calls not changing." mt = len(gp['meantheta']) gp['meantheta'] = theta[:mt] gp['covtheta'] = theta[mt:] return gp, fvals, gvals, funcCalls elif Flag == 'SCG': theta, listF = scg.scg(theta, nlml, dnlml, [gp,X,y,R,w], niters = 100) mt = len(gp['meantheta']) gp['meantheta'] = theta[:mt] gp['covtheta'] = theta[mt:] return gp, listF else: raise Exception("Need to specify a method for optimization in gp_train")
def trainBySCG(self, X, T, nIterations=100, verbose=False, weightPrecision=0, errorPrecision=0, saveWeightsHistory=False): if self.Xmeans is None: self.Xmeans = X.mean(axis=0) self.Xstds = X.std(axis=0) self.Xconstant = self.Xstds == 0 self.XstdsFixed = copy(self.Xstds) self.XstdsFixed[self.Xconstant] = 1 X = self.standardizeX(X) if T.ndim == 1: T = T.reshape((-1, 1)) if self.Tmeans is None: self.Tmeans = T.mean(axis=0) self.Tstds = T.std(axis=0) self.Tconstant = self.Tstds == 0 self.TstdsFixed = copy(self.Tstds) self.TstdsFixed[self.Tconstant] = 1 T = self.standardizeT(T) ## takes in flattened weight vector with minimized error function from previous backward pass ## returns the mse error function using neural network forward pass def errorFunctionOfWts(w): self.getWtMatricesFromSCGWtVector(w) Zprev = X for i in range(len(self.hiddenLayersSpecList)): V = self.Vs[i] # invoke hyperbolic tangent function in each hidden layer Zprev = np.tanh( Zprev @ V[1:, :] + V[0:1, :] ) # handling bias weight without adding column of 1's Y = Zprev @ self.W[1:, :] + self.W[0:1, :] return np.mean((T - Y)**2) ## takes in flattened weight vector with minimized error function from previous backward pass ## runs descent and returns new flattened weight vector with minimized error function from this backward pass def gradientOfErrorFunctionOfWts(w): ## get new weights from last run of SCG self.getWtMatricesFromSCGWtVector(w) Zprev = X Z = [Zprev] for i in range(len(self.hiddenLayersSpecList)): V = self.Vs[i] Zprev = np.tanh(Zprev @ V[1:, :] + V[0:1, :]) Z.append(Zprev) Y = Zprev @ self.W[1:, :] + self.W[0:1, :] delta = -(T - Y) / (X.shape[0] * T.shape[1]) dW = 2 * np.vstack((np.ones( (1, delta.shape[0])) @ delta, Z[-1].T @ delta)) dVs = [] delta = (1 - Z[-1]**2) * (delta @ self.W[1:, :].T) for Zi in range(len(self.hiddenLayersSpecList), 0, -1): Vi = Zi - 1 # because X is first element of Z dV = 2 * np.vstack((np.ones( (1, delta.shape[0])) @ delta, Z[Zi - 1].T @ delta)) dVs.insert(0, dV) delta = (delta @ self.Vs[Vi][1:, :].T) * (1 - Z[Zi - 1]**2) # return the latest minimized error function weights packed as a flat vector return self.getSCGWtVectorFromWtMatrices(dVs, dW) scgresult = scg.scg(self.getSCGWtVectorFromWtMatrices(self.Vs, self.W), errorFunctionOfWts, gradientOfErrorFunctionOfWts, xPrecision=weightPrecision, fPrecision=errorPrecision, nIterations=nIterations, verbose=verbose, ftracep=True, xtracep=saveWeightsHistory) self.getWtMatricesFromSCGWtVector(scgresult['x']) self.reason = scgresult['reason'] self.errorTrace = np.sqrt( scgresult['ftrace']) # * self.Tstds # to unstandardize the MSEs self.numberOfIterations = len(self.errorTrace) self.trained = True self.weightsHistory = scgresult[ 'xtrace'] if saveWeightsHistory else None return self
def train_BFGS(self, x, t, gtol = 1e-2, Nmax = 1000, constrained = False, callback = None): """train network using the Broyden-Fletcher-Goldfarb-Shanno quasi-Newton method""" from scipy.optimize import fmin_bfgs, fmin_l_bfgs_b from scg import scg from datetime import datetime # objective function to be minimized, takes a weight vector and returns an error measure def f(w, x, t): #t0=datetime.now() weights = self.unpack_weights(w) y = self._forward(x, *weights) E = np.sum(self.En(y, t, *weights)) self.E.append(E) # store current network output for internal use self._y = y #print 'eval of f:' + str((datetime.now()-t0)) return E # gradient of f def df(w, x, t): #t0=datetime.now() weights = self.unpack_weights(w) y = self._forward(x, *weights) dEnw = self.pack_weights(*self.dEn(x, y, t, *weights)) g = np.sum(dEnw, 0) #print 'eval of df:' + str((datetime.now()-t0)) return g def iter_status(xk): self._t1 = datetime.now()-self._t0 self._iteration_no = self._iteration_no + 1 print 'Iteration: ' + str(self._iteration_no) print 'E = ' + str(self.E[-1]) print 'execution time: ' + str(self._t1) self._t0 = datetime.now() if callback == None: callback = iter_status t0=datetime.now() x = self.check_inputs(x) if type(t) != np.array: t = np.array(t) x = self.prepare_inputs(x) w = self.pack_weights(*self.get_weights()) if not constrained: self._iteration_no = 0 self._t0 = datetime.now() w_new = fmin_bfgs(f, w, df, (x, t), gtol = gtol, maxiter = Nmax, callback = callback) #w_new = fmin_cg(f, w, df, (x, t), gtol = gtol, maxiter = Nmax) else: #[w_new, E_min, d] = fmin_l_bfgs_b(f, w, df, (x, t), bounds=((-100, 100),)*w.shape[0], #approx_grad=False, factr = 1e7, pgtol = gtol, #maxfun = Nmax) #print d['task'] tmp = scg(w, f, df, x,t, xPrecision=np.finfo(float).eps, nIterations=Nmax, fPrecision=np.finfo(float).eps ) w_new = tmp['x'] print tmp['reason'] #w_new = leastsq(f, w, (x, t), df) self.set_weights(*self.unpack_weights(w_new)) print 'Training complete, took ' + str(datetime.now()-t0) + 's'
def min_wrapper(hyp, F, Flag, *varargin): # Utilize scipy.optimize functions, sgc.py, or minimize.py to # minimize the negative log marginal liklihood. x = convert_to_array(hyp) # convert the hyperparameter class to an array if Flag == 'CG': aa = cg(nlml, x, dnlml, (F, hyp, varargin), maxiter=100, disp=False, full_output=True) x = aa[0] fopt = aa[1] funcCalls = aa[2] gradcalls = aa[3] if aa[4] == 1: print "Maximum number of iterations exceeded." elif aa[4] == 2: print "Gradient and/or function calls not changing." gopt = dnlml(x, F, hyp, varargin) return convert_to_class(x, hyp), fopt, gopt, funcCalls elif Flag == 'BFGS': # Use BFGS aa = bfgs(nlml, x, dnlml, (F, hyp, varargin), maxiter=100, disp=False, full_output=True) x = aa[0] fopt = aa[1] gopt = aa[2] Bopt = aa[3] funcCalls = aa[4] gradcalls = aa[5] if aa[6] == 1: print "Maximum number of iterations exceeded." elif aa[6] == 2: print "Gradient and/or function calls not changing." if isinstance(fopt, ndarray): fopt = fopt[0] return convert_to_class(x, hyp), fopt, gopt, funcCalls elif Flag == 'SCG': # use sgc.py aa = scg(x, nlml, dnlml, (F, hyp, varargin), niters=100) hyp = convert_to_class(aa[0], hyp) fopt = aa[1][-1] gopt = dnlml(aa[0], F, hyp, varargin) return hyp, fopt, gopt, len(aa[1]) elif Flag == 'Minimize': # use minimize.py aa = run(x, nlml, dnlml, (F, hyp, varargin), maxnumfuneval=-100) hyp = convert_to_class(aa[0], hyp) fopt = aa[1][-1] gopt = dnlml(aa[0], F, hyp, varargin) return hyp, fopt, gopt, len(aa[1]) else: raise Exception('Incorrect usage of optimization flag in min_wrapper')