def train(self, alpha=0): """ Define the gradient and hand it off to a scipy gradient-based optimizer. """ # Set alpha so it can be referred to later if needed self.alpha = alpha # Define the derivative of the likelihood with respect to beta_k. # Need to multiply by -1 because we will be minimizing. # The following has a dimension of [1 x k] where k = |W| dl_by_dWk = lambda B, k: (k > 0) * self.alpha * B[k] - np.sum([ \ self.y_train[i] * self.x_train[i, k] * \ sigmoid(-self.y_train[i] *\ np.dot(B, self.x_train[i,:])) \ for i in range(self.n)]) # The full gradient is just an array of componentwise derivatives gradient = lambda B: np.array([dl_by_dWk(B, k) \ for k in range(self.x_train.shape[1])]) # The function to be minimized # Use the negative log likelihood for the objective function. objectiveFunction = lambda B: -self.likelihood(betas=B, alpha=self.alpha) # Optimize print('Optimizing for alpha = {}'.format(alpha)) self.betas = fmin_bfgs(objectiveFunction, self.betas, fprime=gradient)
def train(self): ''' Define the gradient and hand it off to a scipy gradient-based optimizer. ''' # Define the derivative of the likelihood with respect to beta_k. # Need to multiply by -1 because we will be minimizing. # dB_k = lambda B, k : (k > 0) * self.alpha * B[k] - np.sum([ \ # self.y_train[i] * self.x_train[i, k] * \ # sigmoid(-self.y_train[i] *\ # np.dot(B, self.x_train[i,:])) \ # for i in range(self.n)]) def dB_k(B, k): # fit = lambda x: np.array(x)[0] # print B, B.shape, self.x_train[1][0], self.x_train[1][0].shape, foo[0], foo[0].shape # print self.x_train, self.x_train.shape, type(self.x_train) # np.dot(B, self.x_train[1,:], 1) foo = (k > 0) * self.alpha * B[k] - \ np.sum([self.y_train[i] * self.x_train[i, k] * \ sigmoid(-self.y_train[i] * np.dot(B, self.x_train[i,:])) for i in range(self.n)]) return foo # The full gradient is just an array of componentwise derivatives dB = lambda B : np.array([dB_k(B, k) \ for k in range(self.x_train.shape[1])]) # Optimize self.betas = fmin_bfgs(self.negative_lik, self.betas, fprime=dB)
def train(self): """ Set gradient and let BFGS optimizer find min of neg log likelihood B - -log(likelihood) given betas """ # Set derivative of likelihood w.r.t. beta[k], -1 to minimize -log(likelihood) if self.d > 1: dB_k = lambda B, k : (k > -1) * self.alpha * B[k] - \ np.sum([self.y_train[i] * self.x_train[i,k] * \ sigmoid(-self.y_train[i] * \ np.dot(B, self.x_train[i,:])) for i in range(self.n)]) else: dB_k = lambda B, k : (k > -1) * self.alpha * B[k] - \ np.sum([self.y_train[i] * self.x_train[i,] * \ sigmoid(-self.y_train[i] * \ np.dot(B, self.x_train[i,])) for i in range(self.n)]) # The full gradient is just an array of componentwise derivatives dB = lambda B : np.array([dB_k(B, k) for k in range(self.d+1)]) # Optimize self.betas = fmin_bfgs(self.negative_like, self.betas, fprime=dB, disp=True) return self
def train(self): """ Define the gradient and hand it off to a scipy gradient-based optimizer. """ # Define the derivative of the likelihood with respect to beta_k. # Need to multiply by -1 because we will be minimizing. dB_k = lambda B, k : (k > 0) * self.alpha * B[k] - np.sum([ \ self.y_train[i] * self.x_train[i, k] * \ sigmoid(-self.y_train[i] *\ np.dot(B, self.x_train[i,:])) \ for i in range(self.n)]) # The full gradient is just an array of componentwise derivatives dB = lambda B : np.array([dB_k(B, k) \ for k in range(self.x_train.shape[1])]) # Optimize self.initialCost = self.negative_lik(self.betas) self.betas = fmin_bfgs(self.negative_lik, self.betas, fprime=dB, callback=self.onThetaIteration) # self.betas = myFminBFGS.fminLooped(self.negative_lik, self.betas, fprime=dB, callback= self.onThetaIteration) return "Trained"
def train(self): """ Set gradient and let BFGS optimizer find min of neg log likelihood B - -log(likelihood) given betas """ # Set derivative of likelihood w.r.t. beta[k], -1 to minimize -log(likelihood) if self.d > 1: dB_k = lambda B, k : (k > -1) * self.alpha * B[k] - \ np.sum([self.y_train[i] * self.x_train[i,k] * \ sigmoid(-self.y_train[i] * \ np.dot(B, self.x_train[i,:])) for i in range(self.n)]) else: dB_k = lambda B, k : (k > -1) * self.alpha * B[k] - \ np.sum([self.y_train[i] * self.x_train[i,] * \ sigmoid(-self.y_train[i] * \ np.dot(B, self.x_train[i,])) for i in range(self.n)]) # The full gradient is just an array of componentwise derivatives dB = lambda B: np.array([dB_k(B, k) for k in range(self.d + 1)]) # Optimize self.betas = fmin_bfgs(self.negative_like, self.betas, fprime=dB, disp=True) return self
def train(self): for cls in self.classes: print "Training on", cls x, y = self.get_classified_data(cls) z0 = self.get_transformed_data(x[0], self.polynomial_transform_order) w = np.random.random_sample(len(z0)) w = fmin_bfgs(self.make_error(x, y), w, fprime=self.make_error_gradient(x, y)) self.models[cls] = w
def train(self): self.featurize_all() self.betas = np.random.randn(2 * 2 * self.dim + 2) self.betas = fmin_bfgs(self.neg_log_likelihood, self.betas, fprime=self.neg_log_gradient) print self.betas return
def learning_parameters(i, y): def f(theta): return costFunction(theta, i, y) def fprime(theta): return gradFunction(theta, i, y) theta = zeros(3) return fmin_bfgs(f, theta, fprime, disp=True, maxiter=400)
def learning_parameters(i, y): def f(theta): return costFunction(theta, i, y) def fprime(theta): return gradFunction(theta, i, y) theta = zeros(3) return fmin_bfgs(f, theta, fprime, disp=True, maxiter=400)
def alpha(i, y): def f(theta): return lossFunction(theta, i, y) def fprime(theta): return gradient(theta, i, y) theta = zeros(3) return fmin_bfgs(f, theta, fprime, disp=True, maxiter=400)
def logisticRegression(y, x, alpha=.1): """A simple logistic regression model with L2 regularization (zero-mean Gaussian priors on parameters).""" n = y.shape[0] betas = np.zeros(x.shape[1]) # Define the gradient and hand it off to a scipy gradient-based optimizer. # Define the derivative of the likelihood with respect to beta_k. # Need to multiply by -1 because we will be minimizing. def dB_k(B, k): return \ (k > 0) \ * alpha \ * B[k] \ - np.sum([ y[i] * x[i, k] * sigmoid(-y[i] * np.dot(B, x[i,:])) for i in range(n)]) # The full gradient is just an array of componentwise derivatives def dB(B): return \ np.array([dB_k(B, k) for k in range(x.shape[1])]) # Optimize def neg_lik(betas): """ Negative likelihood of the data under the current settings of parameters. """ # Data likelihood l = 0 for i in range(n): l += np.log(sigmoid(y[i] * np.dot(betas, x[i, :]))) # Prior likelihood for k in range(1, x.shape[1]): l -= (alpha / 2.0) * betas[k]**2 return -1.0 * l betas = fmin_bfgs(neg_lik, betas, fprime=dB) # predict the y's again; not sure why sigmoid needs a # transformation... py = np.zeros(n) for i in range(n): py[i] = (sigmoid(np.dot(betas, x[i, :])) - .5) * 2 # f-score precision = sum([ round(y1) == round(y2) for y1, y2 in zip(y, py) if y2 > 0 ]) / float(sum([y2 > 0 for y2 in py])) recall = sum([round(y1) == round(y2) for y1, y2 in zip(y, py) if y1 > 0 ]) / float(sum([y1 > 0 for y1 in y])) f1 = 2 * (precision * recall) / (precision + recall) print(precision, recall) return betas, f1
def trainEpochs(self, N): """Train the associated module for N epochs.""" assert len(self.ds) > 0, "Dataset cannot be empty." self.module.resetDerivatives() def updateStatus(params): test_error = self.ds_val.evaluateModuleMSE(self.module) if self.epoch > 0 and test_error <= amin(self.test_errors): self.optimal_params = self.module.params.copy() self.optimal_epoch = self.epoch print "Epoch %i, E = %g, avg weight: %g" %\ (self.epoch, (self._last_err / self.ds.getLength()), mean(absolute(self.module.params))) print "Test set error: " + str(test_error) self.train_errors.append(self._last_err / self.ds.getLength()) self.test_errors.append(test_error) self.epoch += 1 def f(params): self.module._setParameters(params) error = 0 for seq in self.ds._provideSequences(): self.module.reset() for sample in seq: self.module.activate(sample[0]) for offset, sample in reversed(list(enumerate(seq))): target = sample[1] outerr = target - self.module.outputbuffer[offset] error += 0.5 * sum(outerr ** 2) self._last_err = error return error def df(params): self.module._setParameters(params) self.module.resetDerivatives() for seq in self.ds._provideSequences(): self.module.reset() for sample in seq: self.module.activate(sample[0]) for offset, sample in reversed(list(enumerate(seq))): target = sample[1] outerr = target - self.module.outputbuffer[offset] str(outerr) self.module.backActivate(outerr) # import pdb;pdb.set_trace() # self.module.derivs contains the _negative_ gradient return -1 * self.module.derivs new_params = fmin_bfgs(f, self.module.params, df, maxiter = N, callback = updateStatus, disp = 0) #self.module._setParameters(new_params) self.epoch += 1 self.totalepochs += 1 return self._last_err
def logisticRegression(y, x, alpha=.1): """A simple logistic regression model with L2 regularization (zero-mean Gaussian priors on parameters).""" n = y.shape[0] betas = np.zeros(x.shape[1]) # Define the gradient and hand it off to a scipy gradient-based optimizer. # Define the derivative of the likelihood with respect to beta_k. # Need to multiply by -1 because we will be minimizing. def dB_k(B, k): return \ (k > 0) \ * alpha \ * B[k] \ - np.sum([ y[i] * x[i, k] * sigmoid(-y[i] * np.dot(B, x[i,:])) for i in xrange(n)]) # The full gradient is just an array of componentwise derivatives def dB(B): return \ np.array([dB_k(B, k) for k in range(x.shape[1])]) # Optimize def neg_lik(betas): """ Negative likelihood of the data under the current settings of parameters. """ # Data likelihood l = 0 for i in xrange(n): l += np.log(sigmoid(y[i] * np.dot(betas, x[i,:]))) # Prior likelihood for k in xrange(1, x.shape[1]): l -= (alpha / 2.0) * betas[k]**2 return -1.0 * l betas = fmin_bfgs(neg_lik, betas, fprime=dB) # predict the y's again; not sure why sigmoid needs a # transformation... py = np.zeros(n) for i in xrange(n): py[i] = (sigmoid(np.dot(betas, x[i,:])) - .5) * 2 # f-score precision = sum([round(y1)==round(y2) for y1, y2 in zip(y, py) if y2 > 0]) / float(sum([y2>0 for y2 in py])) recall = sum([round(y1)==round(y2) for y1, y2 in zip(y, py) if y1 > 0]) / float(sum([y1>0 for y1 in y])) f1 = 2 * (precision * recall) / (precision + recall) print(precision, recall) return betas, f1
def logreg_opt(data,targets,start=None,maxiter=100000): """Logistic regression using second order optimization methods.""" n,d = data1.shape n,c = targets.shape data = c_[data,ones(len(data))] A = start if A is None: A = 0.01*randn(c,d+1) def f(x): return logloss(data,targets,x,verbose=1) def fprime(x): return dlogloss(data,targets,x) result = fmin_bfgs(f,A.ravel(),fprime=fprime,maxiter=maxiter) result.shape = (c,d+1) return result
def logreg_opt(data,targets,start=None,maxiter=100000): """Logistic regression using second order optimization methods.""" n,d = data1.shape n,c = targets.shape data = c_[data,ones(len(data))] A = start if A is None: A = 0.01*randn(c,d+1) def f(x): return logloss(data,targets,x,verbose=1) def fprime(x): return dlogloss(data,targets,x) result = fmin_bfgs(f,A.ravel(),fprime=fprime,maxiter=maxiter) result.shape = (c,d+1) return result
def train(self): dBk = lambda B, k: (k > 0) * self.alpha * B[k] - numpy.sum([ self.trainingLabels[i] * self.trainingVectors[i, k] * self. sigmoid(-self.trainingLabels[i] * numpy.dot( B, self.trainingVectors[i, :])) for i in range(self.n) ]) dB = lambda B: numpy.array( [dBk(B, j) for j in range(self.trainingVectors.shape[1])]) self.betas = fmin_bfgs(self.negativeLikelihood(self.betas), self.betas, fprime=dB)
def main(): offsets = np.array([-0.66171298, -0.76921925, -0.70283083, -0.91615231]) with open("uwb.pkl", "r") as f: data = pickle.load(f) arr = np.array(data) pos_opt = np.array([0., 0., 0.]) pos_opt1 = np.array([0., 0., 0.]) pos = np.array([0., 0.]) pts_opt = [] pts_opt1 = [] pts = [] arr = arr[100:] for row in arr: pos_opt1 = optimize.fmin_powell(f_opt, pos_opt1, args=(row[0:4] + offsets, ), xtol=0.0001, ftol=0.0001, disp=0) pos_opt = optimize.fmin_bfgs(f_opt, pos_opt, args=(row[0:4] + offsets, ), disp=0) pos = getPos(ac[0], ac[1], ac[2], row[0], row[1], row[2]) pts_opt1.append(pos_opt1) pts_opt.append(pos_opt) pts.append(pos) #print(np.linalg.norm(pos - row[4:7])) arr = np.array(arr) pts = np.array(pts) pts_opt = np.array(pts_opt) pts_opt1 = np.array(pts_opt1) plt.figure() plt.scatter(pts_opt[:, 0], pts_opt[:, 2], c="g") #plt.scatter(pts_opt1[:,0], pts_opt1[:,2], c="k") plt.scatter(pts[:, 0], pts[:, 1], c="b") plt.scatter(arr[:, 4], arr[:, 6], c="r") plt.show()
def train(self): """ Define the gradient and hand it off to a scipy gradient-based optimizer. """ # Define the derivative of the likelihood with respect to beta_k. # Need to multiply by -1 because we will be minimizing. dB_k = lambda B, k : (k > 0) * self.alpha * B[k] - np.sum([self.y_train[i] * self.x_train[i, k] * sigmoid(-self.y_train[i] * np.dot(B, self.x_train[i,:])) for i in range(self.n)]) # The full gradient is just an array of componentwise derivatives dB = lambda B : np.array([dB_k(B, k) for k in range(self.x_train.shape[1])]) # Optimize self.betas = fmin_bfgs(self.negative_lik, self.betas, fprime=dB)
def train(self): """ Define the gradient and hand it off to a scipy gradient-based optimizer. """ # Define the derivative of the likelihood with respect to beta_k. # Need to multiply by -1 because we will be minimizing. dB_k = lambda B, k: (k > 0) * self.alpha * B[k] - np.sum([ self.y_train[i] * self.x_train[i, k] * sigmoid(-self.y_train[ i] * np.dot(B, self.x_train[i, :])) for i in range(self.n) ]) # The full gradient is just an array of componentwise derivatives dB = lambda B: np.array( [dB_k(B, k) for k in range(self.x_train.shape[1])]) # Optimize self.betas = fmin_bfgs(self.negative_lik, self.betas, fprime=dB)
def train(self): print "Initial Likelihood : ", self.initialCost import dill #nns = dill.source.getsource(self.NN) #print nns, "************" #print dill.dump(self.NN , open('/Users/rohanraja/Dropbox/Distributed Computing Startup/persist/Test.dat', 'w')) nnCost = lambda W : self.NN.costFn(self.NN.deLinearize(W), self.x_train, self.y_train) nnCostPrime = lambda W : self.NN.costFnPrime(self.NN.deLinearize(W), self.x_train, self.y_train) self.betas = fmin_bfgs(self.nnCost, self.betas, fprime=self.nnCostPrime ,callback= self.onThetaIteration) #dill.dumps(nnnCost) #self.betas = myFminBFGS.fminLooped(nnCost, self.betas, fprime=nnCostPrime ,callback=self.onThetaIteration) #self.betas = myFminBFGS.fminLooped(self.nnCost, self.betas, fprime=self.nnCostPrime ,callback= self.onThetaIteration) return "Trained"
def train(self,X,Y): """ Define the gradient and hand it off to a scipy gradient-based optimizer. """ #normalize all Y data to be between -1 and 1 low = min(Y) high = max(Y) #there was no data to predict on, so just degenerate to predicting True all the time self.degenerate = False if high == low: self.degenerate = high return self.m = 2.0/(high-low) self.b = (2.0*low/(low-high))-1 #constants for unscaling the output self.z = high-low self.w = low self.X = np.array(X) self.Y = self.m*np.array(Y)+self.b self.n = len(X) self.betas = np.zeros(len(X[0])) # Define the derivative of the likelihood with respect to beta_k. # Need to multiply by -1 because we will be minimizing. dB_k = lambda B, k : (k > 0) * self.alpha * B[k] - np.sum([ \ self.Y[i] * self.X[i, k] * \ sigmoid(-self.Y[i] *\ np.dot(B, self.X[i,:])) \ for i in range(self.n)]) # The full gradient is just an array of componentwise derivatives dB = lambda B : np.array([dB_k(B, k) \ for k in range(self.X.shape[1])]) # Optimize self.betas = fmin_bfgs(self.lik, self.betas, fprime=dB, disp=False)
def train(self, X, Y): """ Define the gradient and hand it off to a scipy gradient-based optimizer. """ #normalize all Y data to be between -1 and 1 low = min(Y) high = max(Y) #there was no data to predict on, so just degenerate to predicting True all the time self.degenerate = False if high == low: self.degenerate = high return self.m = 2.0 / (high - low) self.b = (2.0 * low / (low - high)) - 1 #constants for unscaling the output self.z = high - low self.w = low self.X = np.array(X) self.Y = self.m * np.array(Y) + self.b self.n = len(X) self.betas = np.zeros(len(X[0])) # Define the derivative of the likelihood with respect to beta_k. # Need to multiply by -1 because we will be minimizing. dB_k = lambda B, k : (k > 0) * self.alpha * B[k] - np.sum([ \ self.Y[i] * self.X[i, k] * \ sigmoid(-self.Y[i] *\ np.dot(B, self.X[i,:])) \ for i in range(self.n)]) # The full gradient is just an array of componentwise derivatives dB = lambda B : np.array([dB_k(B, k) \ for k in range(self.X.shape[1])]) # Optimize self.betas = fmin_bfgs(self.lik, self.betas, fprime=dB, disp=False)
def train(self): # import ipdb; ipdb.set_trace() print("Initial Likelihood : ", self.initialCost) #nns = dill.source.getsource(self.NN) #print nns, "************" #print dill.dump(self.NN , open('/Users/rohanraja/Dropbox/Distributed Computing Startup/persist/Test.dat', 'w')) nnCost = lambda W: self.NN.costFn(self.NN.deLinearize(W), self.x_train, self.y_train) nnCostPrime = lambda W: self.NN.costFnPrime(self.NN.deLinearize(W), self.x_train, self.y_train) # # # self.betas = minimize( # self.nnCost, # self.betas, # # method = 'BFGS', # method = self.optiMethod, # jac=self.nnCostPrime , # options={'disp': True}, # callback= self.onThetaIteration # ) self.betas = fmin_bfgs(self.nnCost, self.betas, fprime=self.nnCostPrime, callback=self.onThetaIteration) # self.betas = myFminBFGS.fminLooped(self.nnCost, self.betas, fprime=self.nnCostPrime ,callback= self.onThetaIteration) #dill.dumps(nnnCost) #self.betas = myFminBFGS.fminLooped(nnCost, self.betas, fprime=nnCostPrime ,callback=self.onThetaIteration) #self.betas = myFminBFGS.fminLooped(self.nnCost, self.betas, fprime=self.nnCostPrime ,callback= self.onThetaIteration) return "Trained"
def train(self, data, alpha=0): """ Define the gradient and hand it off to a scipy gradient-based optimizer. """ # Set alpha so it can be referred to later if needed self.alpha = alpha # Define the derivative of the likelihood with respect to beta_k. # Need to multiply by -1 because we will be minimizing. dB_k = lambda B, k: (k > 0) * self.alpha * B[k] - np.sum([ \ data.y_train[i] * data.x_train[i, k] * \ sigmoid(-data.y_train[i] *\ np.dot(B, data.x_train[i,:])) \ for i in range(data.n)]) # The full gradient is just an array of componentwise derivatives dB = lambda B: np.array([dB_k(B, k) \ for k in range(data.x_train.shape[1])]) # The function to be minimized func = lambda B: -data.likelihood(betas=B, alpha=self.alpha) # Optimize self.betas = fmin_bfgs(func, self.betas, fprime=dB)
def optHyper(gpr,logtheta,Ifilter=None,priors=None,maxiter=100,gradcheck=False): """optimize hyperparemters of gp gpr starting from gpr optHyper(gpr,logtheta,filter=None,prior=None) gpr: GP regression classe logtheta: starting piont for optimization Ifilter : filter index vector prior : non-default prior, otherwise assume first index amplitude, last noise, rest:lengthscales """ if priors is None: # use a very crude default prior if we don't get anything else: priors = defaultPriors(gpr,logtheta) def fixlogtheta(logtheta,limit=1E3): """make a valid logtheta which is non-infinite and non-0""" rv = logtheta.copy() I_upper = logtheta>limit I_lower = logtheta<-limit rv[I_upper] = +limit rv[I_lower] = -limit return rv def checklogtheta(logtheta,limit=1E3): """make a valid logtheta which is non-infinite and non-0""" I_upper = logtheta>limit I_lower = logtheta<-limit return not (I_upper.any() or I_lower.any()) #TODO: mean-function def f(logtheta): #logtheta_ = fixlogtheta(logtheta) logtheta_ = logtheta if not checklogtheta(logtheta): print logtheta #make optimzier/sampler search somewhere else return 1E6 rv = gpr.lMl(logtheta_,lml=True,dlml=False,priors=priors) LG.debug("L("+str(logtheta_)+")=="+str(rv)) if isnan(rv): return 1E6 return rv def df(logtheta): #logtheta_ = fixlogtheta(logtheta) logtheta_ = logtheta if not checklogtheta(logtheta): #make optimzier/sampler search somewhere else print logtheta return zeros_like(logtheta_) rv = gpr.lMl(logtheta_,lml=False,dlml=True,priors=priors) LG.debug("dL("+str(logtheta_)+")=="+str(rv)) #mask out filtered dimensions if not Ifilter is None: rv = rv*Ifilter if isnan(rv).any(): In = isnan(rv) rv[In] = 1E6 return rv plotit = True plotit = False if(plotit): X = arange(0.001,0.05,0.001) Y = zeros(size(X)) dY = zeros(size(X)) k=2 theta = logtheta for i in range(len(X)): theta[k] = log(X[i]) Y[i] = f(theta) dY[i] = df(theta)[k] plot(X,Y) hold(True); plot(X,dY) show() #start-parameters theta0 = logtheta LG.info("startparameters for opt:"+str(exp(logtheta))) if gradcheck: LG.info("check_grad:" + str(OPT.check_grad(f,df,theta0))) raw_input() LG.info("start optimization") #opt_params=OPT.fmin_cg (f, theta0, fprime = df, args = (), gtol = 1.0000000000000001e-005, maxiter =maxiter, full_output = 1, disp = 1, retall = 0) #opt_params=OPT.fmin_ncg (f, theta0, fprime = df, fhess_p=None, fhess=None, args=(), avextol=1.0000000000000001e-04, epsilon=1.4901161193847656e-08, maxiter=maxiter, full_output=1, disp=1, retall=0) opt_params=OPT.fmin_bfgs(f, theta0, fprime=df, args=(), gtol=1.0000000000000001e-04, norm=inf, epsilon=1.4901161193847656e-08, maxiter=maxiter, full_output=1, disp=(0), retall=0) rv = opt_params[0] LG.info("old parameters:") LG.info(str(exp(logtheta))) LG.info("optimized parameters:") LG.info(str(exp(rv))) LG.info("grad:"+str(df(rv))) return rv
def optimise_bfgs(self,start): print print "***** BFGS OPTIMISATION *****" return fmin_bfgs(self.objective, start, fprime=self.grad, callback=self.debug)
def optHyper(gpr, logtheta, Ifilter=None, priors=None, maxiter=100, gradcheck=False): """optimize hyperparemters of gp gpr starting from gpr optHyper(gpr,logtheta,filter=None,prior=None) gpr: GP regression classe logtheta: starting piont for optimization Ifilter : filter index vector prior : non-default prior, otherwise assume first index amplitude, last noise, rest:lengthscales """ if priors is None: # use a very crude default prior if we don't get anything else: priors = defaultPriors(gpr, logtheta) def fixlogtheta(logtheta, limit=1E3): """make a valid logtheta which is non-infinite and non-0""" rv = logtheta.copy() I_upper = logtheta > limit I_lower = logtheta < -limit rv[I_upper] = +limit rv[I_lower] = -limit return rv def checklogtheta(logtheta, limit=1E3): """make a valid logtheta which is non-infinite and non-0""" I_upper = logtheta > limit I_lower = logtheta < -limit return not (I_upper.any() or I_lower.any()) #TODO: mean-function def f(logtheta): #logtheta_ = fixlogtheta(logtheta) logtheta_ = logtheta if not checklogtheta(logtheta): print logtheta #make optimzier/sampler search somewhere else return 1E6 rv = gpr.lMl(logtheta_, lml=True, dlml=False, priors=priors) LG.debug("L(" + str(logtheta_) + ")==" + str(rv)) if isnan(rv): return 1E6 return rv def df(logtheta): #logtheta_ = fixlogtheta(logtheta) logtheta_ = logtheta if not checklogtheta(logtheta): #make optimzier/sampler search somewhere else print logtheta return zeros_like(logtheta_) rv = gpr.lMl(logtheta_, lml=False, dlml=True, priors=priors) LG.debug("dL(" + str(logtheta_) + ")==" + str(rv)) #mask out filtered dimensions if not Ifilter is None: rv = rv * Ifilter if isnan(rv).any(): In = isnan(rv) rv[In] = 1E6 return rv plotit = True plotit = False if (plotit): X = arange(0.001, 0.05, 0.001) Y = zeros(size(X)) dY = zeros(size(X)) k = 2 theta = logtheta for i in range(len(X)): theta[k] = log(X[i]) Y[i] = f(theta) dY[i] = df(theta)[k] plot(X, Y) hold(True) plot(X, dY) show() #start-parameters theta0 = logtheta LG.info("startparameters for opt:" + str(exp(logtheta))) if gradcheck: LG.info("check_grad:" + str(OPT.check_grad(f, df, theta0))) raw_input() LG.info("start optimization") #opt_params=OPT.fmin_cg (f, theta0, fprime = df, args = (), gtol = 1.0000000000000001e-005, maxiter =maxiter, full_output = 1, disp = 1, retall = 0) #opt_params=OPT.fmin_ncg (f, theta0, fprime = df, fhess_p=None, fhess=None, args=(), avextol=1.0000000000000001e-04, epsilon=1.4901161193847656e-08, maxiter=maxiter, full_output=1, disp=1, retall=0) opt_params = OPT.fmin_bfgs(f, theta0, fprime=df, args=(), gtol=1.0000000000000001e-04, norm=inf, epsilon=1.4901161193847656e-08, maxiter=maxiter, full_output=1, disp=(0), retall=0) rv = opt_params[0] LG.info("old parameters:") LG.info(str(exp(logtheta))) LG.info("optimized parameters:") LG.info(str(exp(rv))) LG.info("grad:" + str(df(rv))) return rv
def _cgmin(self, X, Y): w = np.copy(self.W) res = fmin_bfgs(self._costFunction, w, args=(X,Y), full_output=1, retall=1) self.allvec = res[-1] self.W = res[0] self.gradEval = res[-2]
def optimise_bfgs(self, start): print print "***** BFGS OPTIMISATION *****" return fmin_bfgs(self.objective, start, fprime=self.grad, callback=self.debug)
return cost def func(p,*args): a,b=p x,y=args cost = y- (a*x + b); return cost x = np.arange(1,10,1); y_true = 3*x+ 4; y_mean = y_true + 10*np.random.rand(len(x)) p0= np.array([1,2]); print p0 rs1= fmin_bfgs(func1,[1,2],args=(x,y_mean)) rs2= fmin_cg(func1,[1,2],args=(x,y_mean)) rs = leastsq(func,p0,args=(x,y_mean)); # # rs1=fmin_bfgs(func,p0,args=(x,y_mean)) print "rs=",rs # print "rs1=",rs1 print "rs2=",rs2 y1= rs[0][0]*x + rs[0][1] y2 = rs1[0]*x + rs1[1] pl.plot(x,y1,'r',label="y1"); pl.plot(x,y2,'b',label="y2"); pl.plot(x,y_mean,'og',label='y_mean');
def l_bfgs_min(): print(optimize.fmin_bfgs(f, 0))
# -*- coding: GBK -*- ''' Created on 2013Äê11ÔÂ2ÈÕ @author: asus ''' from scipy.optimize.optimize import fmin_bfgs from numpy.ma.core import zeros def rosen(x, p1, p2): return sum(100.0 * (x[1:] - x[:-1]**2.0)**2.0 + (1 - x[:-1])**2.0) x0 = [1.3, 0.7, 0.8, 1.9, 1.2] xopt = fmin_bfgs(rosen, x0, args=[1, 1])
def train_bfgs(self, xs, ys, iterations=10000, iteration_callback=None, validation_xs=None, validation_ys=None, validation_frequency=1, regularization=0.01, plot_errors=None): """ Train on data stored in Theano tensors. E.g. xs = rng.randn(N, num_features) ys = rng.randint(size=N, low=0, high=2) iteration_callback is called after each iteration with args (iteration, error array). """ compute_grad = theano.function(inputs=[self.x, self.y], outputs=self.gtheta, givens={self.reg_coef: regularization}) # Prepare cost function to optimize and its gradient (jacobian) def train_fn(theta): self.theta.set_value(theta, borrow=True) return self._cost_fn_reg(xs, ys, reg_coef=regularization) def train_fn_grad(theta): self.theta.set_value(theta, borrow=True) return compute_grad(xs, ys) # Prepare a callback for between iterations best_validation_error = [numpy.inf] iteration_counter = [0] validation_errors = [] training_errors = [] def callback(new_theta): # Update the parameters of the model self.theta.set_value(new_theta, borrow=True) # Only evaluate on val set every validation_frequencyth iteration if validation_xs is not None and (iteration_counter[0] + 1) % validation_frequency == 0: # Compute accuracy on validation set validation_error = self.error(validation_xs, validation_ys) # Compute accuracy on training set training_error = self.error(xs, ys) # Compute how much we've improved on the previous best validation error if validation_error < best_validation_error[0]: validation_improvement = 0.0 else: validation_improvement = ( validation_error - best_validation_error[0] ) / best_validation_error[0] * 100.0 best_validation_error[0] = validation_error # Plot some graphs if plot_errors and validation_error is not None: validation_errors.append(validation_error) training_errors.append(training_error) plot_costs(plot_errors, (training_errors, "training set error"), (validation_errors, "val set error")) else: validation_error = training_error = validation_improvement = None if iteration_callback is not None: # TODO Compute training cost? iteration_callback(iteration_counter[0], 0.0, training_error, validation_error, validation_improvement) iteration_counter[0] += 1 # Call scipy's BFGS optimization function fmin_bfgs(train_fn, self.theta.get_value(), fprime=train_fn_grad, callback=callback, disp=True, maxiter=iterations)
def func(p, *args): a, b = p x, y = args cost = y - (a * x + b) return cost x = np.arange(1, 10, 1) y_true = 3 * x + 4 y_mean = y_true + 10 * np.random.rand(len(x)) p0 = np.array([1, 2]) print p0 rs1 = fmin_bfgs(func1, [1, 2], args=(x, y_mean)) rs2 = fmin_cg(func1, [1, 2], args=(x, y_mean)) rs = leastsq(func, p0, args=(x, y_mean)) # # rs1=fmin_bfgs(func,p0,args=(x,y_mean)) print "rs=", rs # print "rs1=", rs1 print "rs2=", rs2 y1 = rs[0][0] * x + rs[0][1] y2 = rs1[0] * x + rs1[1] pl.plot(x, y1, 'r', label="y1") pl.plot(x, y2, 'b', label="y2") pl.plot(x, y_mean, 'og', label='y_mean')
# -*- coding: GBK -*- ''' Created on 2013Äê11ÔÂ2ÈÕ @author: asus ''' from scipy.optimize.optimize import fmin_bfgs from numpy.ma.core import zeros def rosen(x,p1,p2): return sum(100.0*(x[1:]-x[:-1]**2.0)**2.0 + (1-x[:-1])**2.0) x0 = [1.3, 0.7, 0.8, 1.9, 1.2] xopt = fmin_bfgs(rosen, x0,args=[1,1])