Beispiel #1
0
def min_wrapper(hyp, F, Flag, *varargin):
    # Utilize scipy.optimize functions to minimize the negative log marginal liklihood.  This is REALLY inefficient!
    x = convert_to_array(hyp) # Converts the hyperparameter class to an array

    if Flag == 'CG':
        aa = cg(nlml, x, dnlml, (F,hyp,varargin), maxiter=100, disp=True, full_output=True)
        x = aa[0]; fx = aa[1]; funcCalls = aa[2]; gradcalls = aa[3]
        if aa[4] == 1:
            print "Maximum number of iterations exceeded."
        elif aa[4] ==  2:
            print "Gradient and/or function calls not changing."
        gvals = dnlml(x,F,hyp,varargin)
        return convert_to_class(x,hyp), fx, gvals, funcCalls

    elif Flag == 'BFGS':
        # Use BFGS
        aa = bfgs(nlml, x, dnlml, (F,hyp,varargin), maxiter=100, disp=False, full_output=True)
        x = aa[0]; fvals = aa[1]; gvals = aa[2]; Bopt = aa[3]; funcCalls = aa[4]; gradcalls = aa[5]
        if aa[6] == 1:
            print "Maximum number of iterations exceeded."
        elif aa[6] ==  2:
            print "Gradient and/or function calls not changing."
        return convert_to_class(x,hyp), fvals, gvals, funcCalls

    elif Flag == 'SCG':
        # Use SCG
        aa = scg(x, nlml, dnlml, (F,hyp,varargin),niters=40)
        x = aa[0];
        fvals = aa[1]
        gvals = dnlml(x,F,hyp,varargin)
        return convert_to_class(x,hyp), fvals, gvals

    else:
        raise Exception('Incorrect usage of optimization flag in min_wrapper')
Beispiel #2
0
def min_wrapper(hyp, F, Flag, *varargin):
    # Utilize scipy.optimize functions to minimize the negative log marginal liklihood.  This is REALLY inefficient!
    x = convert_to_array(hyp)  # Converts the hyperparameter class to an array

    if Flag == 'CG':
        aa = cg(nlml,
                x,
                dnlml, (F, hyp, varargin),
                maxiter=100,
                disp=True,
                full_output=True)
        x = aa[0]
        fx = aa[1]
        funcCalls = aa[2]
        gradcalls = aa[3]
        if aa[4] == 1:
            print "Maximum number of iterations exceeded."
        elif aa[4] == 2:
            print "Gradient and/or function calls not changing."
        gvals = dnlml(x, F, hyp, varargin)
        return convert_to_class(x, hyp), fx, gvals, funcCalls

    elif Flag == 'BFGS':
        # Use BFGS
        aa = bfgs(nlml,
                  x,
                  dnlml, (F, hyp, varargin),
                  maxiter=100,
                  disp=False,
                  full_output=True)
        x = aa[0]
        fvals = aa[1]
        gvals = aa[2]
        Bopt = aa[3]
        funcCalls = aa[4]
        gradcalls = aa[5]
        if aa[6] == 1:
            print "Maximum number of iterations exceeded."
        elif aa[6] == 2:
            print "Gradient and/or function calls not changing."
        return convert_to_class(x, hyp), fvals, gvals, funcCalls

    elif Flag == 'SCG':
        # Use SCG
        aa = scg(x, nlml, dnlml, (F, hyp, varargin), niters=40)
        x = aa[0]
        fvals = aa[1]
        gvals = dnlml(x, F, hyp, varargin)
        return convert_to_class(x, hyp), fvals, gvals

    else:
        raise Exception('Incorrect usage of optimization flag in min_wrapper')
Beispiel #3
0
def min_wrapper(hyp, F, Flag, *varargin):
    # Utilize scipy.optimize functions, sgc.py, or minimize.py to
    # minimize the negative log marginal liklihood.
    
    x = convert_to_array(hyp)   # convert the hyperparameter class to an array

    if Flag == 'CG':
        aa = cg(nlml, x, dnlml, (F,hyp,varargin), maxiter=100, disp=False, full_output=True)
        x = aa[0]; fopt = aa[1]; funcCalls = aa[2]; gradcalls = aa[3]
        if aa[4] == 1:
            print "Maximum number of iterations exceeded."
        elif aa[4] ==  2:
            print "Gradient and/or function calls not changing."
        gopt = dnlml(x,F,hyp,varargin)
        return convert_to_class(x,hyp), fopt, gopt, funcCalls

    elif Flag == 'BFGS':
        # Use BFGS
        aa = bfgs(nlml, x, dnlml, (F,hyp,varargin), maxiter=100, disp=False, full_output=True)
        x = aa[0]; fopt = aa[1]; gopt = aa[2]; Bopt = aa[3]; funcCalls = aa[4]; gradcalls = aa[5]
        if aa[6] == 1:
            print "Maximum number of iterations exceeded."
        elif aa[6] ==  2:
            print "Gradient and/or function calls not changing."
        if isinstance(fopt, ndarray):
            fopt = fopt[0]
        return convert_to_class(x,hyp), fopt, gopt, funcCalls

    elif Flag == 'SCG':
        # use sgc.py
        aa   = scg(x, nlml, dnlml, (F,hyp,varargin), niters = 100)
        hyp  = convert_to_class(aa[0],hyp)
        fopt = aa[1][-1]
        gopt = dnlml(aa[0],F,hyp,varargin)
        return hyp, fopt, gopt, len(aa[1])

    elif Flag == 'Minimize':
        # use minimize.py
        aa   = run(x, nlml, dnlml, (F,hyp,varargin), maxnumfuneval=-100)
        hyp  = convert_to_class(aa[0],hyp)
        fopt = aa[1][-1]
        gopt = dnlml(aa[0],F,hyp,varargin)
        return hyp, fopt, gopt, len(aa[1])

    else:
        raise Exception('Incorrect usage of optimization flag in min_wrapper')
Beispiel #4
0
def gp_train(gp, X, y, R=None, w=None, Flag = None):
    ''' gp_train() returns the learnt hyperparameters.
    Following chapter 5.4.1 in Rasmussen and Williams: GPs for ML (2006).
    The original version (MATLAB implementation) of used optimizer minimize.m 
    is copyright (C) 1999 - 2006, Carl Edward Rasmussen.
    The used python versions are in scipy.optimize
    
    Input R and w is needed for XGP regression! '''

    # Build the parameter list that we will optimize
    theta = np.concatenate((gp['meantheta'],gp['covtheta']))
    if Flag == 'CG':
        aa = cg(nlml, theta, dnlml, [gp,X,y,R,w], maxiter=100, disp=False, full_output=True)
        theta = aa[0]; fvals = aa[1]; funcCalls = aa[2]; gradcalls = aa[3]
        gvals = dnlml(theta, gp, X, y, R, w)
        if aa[4] == 1:
            print "Maximum number of iterations exceeded." 
        elif aa[4] ==  2:
            print "Gradient and/or function calls not changing."
        mt = len(gp['meantheta'])
        gp['meantheta'] = theta[:mt]
        gp['covtheta']  = theta[mt:]
        return gp, fvals, gvals, funcCalls
    elif Flag == 'BFGS':
        # Use BFGS
        #aa = bfgs(nlml, theta, dnlml, [gp,X,y,R,w], maxiter=100, disp=False, full_output=True)
        aa = bfgs(nlml, theta, dnlml, [gp,X,y,R,w], maxiter=100, disp=True, full_output=True)
        theta = aa[0]; fvals = aa[1]; gvals = aa[2]; Bopt = aa[3]; funcCalls = aa[4]; gradcalls = aa[5]
        if aa[6] == 1:
            print "Maximum number of iterations exceeded." 
        elif aa[6] ==  2:
            print "Gradient and/or function calls not changing."
        mt = len(gp['meantheta'])
        gp['meantheta'] = theta[:mt]
        gp['covtheta']  = theta[mt:]
        return gp, fvals, gvals, funcCalls
    elif Flag == 'SCG':
        theta, listF = scg.scg(theta, nlml, dnlml, [gp,X,y,R,w], niters = 100)
        mt = len(gp['meantheta'])
        gp['meantheta'] = theta[:mt]
        gp['covtheta']  = theta[mt:]
        return gp, listF 
    else:
        raise Exception("Need to specify a method for optimization in gp_train")
Beispiel #5
0
    def trainBySCG(self,
                   X,
                   T,
                   nIterations=100,
                   verbose=False,
                   weightPrecision=0,
                   errorPrecision=0,
                   saveWeightsHistory=False):

        if self.Xmeans is None:
            self.Xmeans = X.mean(axis=0)
            self.Xstds = X.std(axis=0)
            self.Xconstant = self.Xstds == 0
            self.XstdsFixed = copy(self.Xstds)
            self.XstdsFixed[self.Xconstant] = 1
        X = self.standardizeX(X)

        if T.ndim == 1:
            T = T.reshape((-1, 1))

        if self.Tmeans is None:
            self.Tmeans = T.mean(axis=0)
            self.Tstds = T.std(axis=0)
            self.Tconstant = self.Tstds == 0
            self.TstdsFixed = copy(self.Tstds)
            self.TstdsFixed[self.Tconstant] = 1
        T = self.standardizeT(T)

        ## takes in flattened weight vector with minimized error function from previous backward pass
        ## returns the mse error function using neural network forward pass
        def errorFunctionOfWts(w):
            self.getWtMatricesFromSCGWtVector(w)
            Zprev = X
            for i in range(len(self.hiddenLayersSpecList)):
                V = self.Vs[i]
                # invoke hyperbolic tangent function in each hidden layer
                Zprev = np.tanh(
                    Zprev @ V[1:, :] + V[0:1, :]
                )  # handling bias weight without adding column of 1's
            Y = Zprev @ self.W[1:, :] + self.W[0:1, :]
            return np.mean((T - Y)**2)

        ## takes in flattened weight vector with minimized error function from previous backward pass
        ## runs descent and returns new flattened weight vector with minimized error function from this backward pass
        def gradientOfErrorFunctionOfWts(w):
            ## get new weights from last run of SCG
            self.getWtMatricesFromSCGWtVector(w)
            Zprev = X
            Z = [Zprev]
            for i in range(len(self.hiddenLayersSpecList)):
                V = self.Vs[i]
                Zprev = np.tanh(Zprev @ V[1:, :] + V[0:1, :])
                Z.append(Zprev)
            Y = Zprev @ self.W[1:, :] + self.W[0:1, :]
            delta = -(T - Y) / (X.shape[0] * T.shape[1])
            dW = 2 * np.vstack((np.ones(
                (1, delta.shape[0])) @ delta, Z[-1].T @ delta))
            dVs = []
            delta = (1 - Z[-1]**2) * (delta @ self.W[1:, :].T)
            for Zi in range(len(self.hiddenLayersSpecList), 0, -1):
                Vi = Zi - 1  # because X is first element of Z
                dV = 2 * np.vstack((np.ones(
                    (1, delta.shape[0])) @ delta, Z[Zi - 1].T @ delta))
                dVs.insert(0, dV)
                delta = (delta @ self.Vs[Vi][1:, :].T) * (1 - Z[Zi - 1]**2)
            # return the latest minimized error function weights packed as a flat vector
            return self.getSCGWtVectorFromWtMatrices(dVs, dW)

        scgresult = scg.scg(self.getSCGWtVectorFromWtMatrices(self.Vs, self.W),
                            errorFunctionOfWts,
                            gradientOfErrorFunctionOfWts,
                            xPrecision=weightPrecision,
                            fPrecision=errorPrecision,
                            nIterations=nIterations,
                            verbose=verbose,
                            ftracep=True,
                            xtracep=saveWeightsHistory)

        self.getWtMatricesFromSCGWtVector(scgresult['x'])
        self.reason = scgresult['reason']
        self.errorTrace = np.sqrt(
            scgresult['ftrace'])  # * self.Tstds # to unstandardize the MSEs
        self.numberOfIterations = len(self.errorTrace)
        self.trained = True
        self.weightsHistory = scgresult[
            'xtrace'] if saveWeightsHistory else None
        return self
Beispiel #6
0
 def train_BFGS(self, x, t, gtol = 1e-2, Nmax = 1000, constrained = False,
                              callback = None):
     """train network using the Broyden-Fletcher-Goldfarb-Shanno quasi-Newton method"""
     from scipy.optimize import fmin_bfgs, fmin_l_bfgs_b
     from scg import scg
     from datetime import datetime
     
     # objective function to be minimized, takes a weight vector and returns an error measure
     def f(w, x, t):
         #t0=datetime.now()
         weights = self.unpack_weights(w)
         y = self._forward(x, *weights)
         E = np.sum(self.En(y, t, *weights))
         self.E.append(E)
         # store current network output for internal use
         self._y = y
         #print 'eval of f:' + str((datetime.now()-t0))
         return E
     
     # gradient of f
     def df(w, x, t):
         #t0=datetime.now()
         weights = self.unpack_weights(w)
         y = self._forward(x, *weights)
         dEnw = self.pack_weights(*self.dEn(x, y, t, *weights))
         g = np.sum(dEnw, 0)
         #print 'eval of df:' + str((datetime.now()-t0))
         return g
         
     def iter_status(xk):
         self._t1 = datetime.now()-self._t0
         self._iteration_no = self._iteration_no + 1
         print 'Iteration: ' + str(self._iteration_no)
         print 'E = ' + str(self.E[-1])
         print 'execution time: ' + str(self._t1)
         self._t0 = datetime.now()
         
     if callback == None: callback = iter_status
     
     t0=datetime.now()
     x = self.check_inputs(x)
     if type(t) != np.array:
         t = np.array(t)
     
     x = self.prepare_inputs(x)
     
     w = self.pack_weights(*self.get_weights())
     if not constrained:
         self._iteration_no = 0
         self._t0 = datetime.now()
         w_new = fmin_bfgs(f, w, df, (x, t), gtol = gtol, maxiter = Nmax, callback = callback)
         #w_new = fmin_cg(f, w, df, (x, t), gtol = gtol, maxiter = Nmax)
     else:
         #[w_new, E_min, d] = fmin_l_bfgs_b(f, w, df, (x, t), bounds=((-100, 100),)*w.shape[0],
                                                                             #approx_grad=False, factr = 1e7, pgtol = gtol,
                                                                             #maxfun = Nmax)
         #print d['task']
         tmp = scg(w, f, df, x,t, 
                             xPrecision=np.finfo(float).eps, 
                             nIterations=Nmax, 
                             fPrecision=np.finfo(float).eps
                             )
         w_new = tmp['x']
         print tmp['reason']
     #w_new = leastsq(f, w, (x, t), df)
     self.set_weights(*self.unpack_weights(w_new))
     
     print 'Training complete, took ' + str(datetime.now()-t0) + 's'
Beispiel #7
0
def min_wrapper(hyp, F, Flag, *varargin):
    # Utilize scipy.optimize functions, sgc.py, or minimize.py to
    # minimize the negative log marginal liklihood.

    x = convert_to_array(hyp)  # convert the hyperparameter class to an array

    if Flag == 'CG':
        aa = cg(nlml,
                x,
                dnlml, (F, hyp, varargin),
                maxiter=100,
                disp=False,
                full_output=True)
        x = aa[0]
        fopt = aa[1]
        funcCalls = aa[2]
        gradcalls = aa[3]
        if aa[4] == 1:
            print "Maximum number of iterations exceeded."
        elif aa[4] == 2:
            print "Gradient and/or function calls not changing."
        gopt = dnlml(x, F, hyp, varargin)
        return convert_to_class(x, hyp), fopt, gopt, funcCalls

    elif Flag == 'BFGS':
        # Use BFGS
        aa = bfgs(nlml,
                  x,
                  dnlml, (F, hyp, varargin),
                  maxiter=100,
                  disp=False,
                  full_output=True)
        x = aa[0]
        fopt = aa[1]
        gopt = aa[2]
        Bopt = aa[3]
        funcCalls = aa[4]
        gradcalls = aa[5]
        if aa[6] == 1:
            print "Maximum number of iterations exceeded."
        elif aa[6] == 2:
            print "Gradient and/or function calls not changing."
        if isinstance(fopt, ndarray):
            fopt = fopt[0]
        return convert_to_class(x, hyp), fopt, gopt, funcCalls

    elif Flag == 'SCG':
        # use sgc.py
        aa = scg(x, nlml, dnlml, (F, hyp, varargin), niters=100)
        hyp = convert_to_class(aa[0], hyp)
        fopt = aa[1][-1]
        gopt = dnlml(aa[0], F, hyp, varargin)
        return hyp, fopt, gopt, len(aa[1])

    elif Flag == 'Minimize':
        # use minimize.py
        aa = run(x, nlml, dnlml, (F, hyp, varargin), maxnumfuneval=-100)
        hyp = convert_to_class(aa[0], hyp)
        fopt = aa[1][-1]
        gopt = dnlml(aa[0], F, hyp, varargin)
        return hyp, fopt, gopt, len(aa[1])

    else:
        raise Exception('Incorrect usage of optimization flag in min_wrapper')