def compute_vector_lr(syn0_proxy, bins_proxy, neg_words_mult, lbda, word_idxs): syn0 = syn0_proxy.get() bins = bins_proxy.get() counts = defaultdict(lambda: np.zeros(2).astype(np.uint32)) for widx in word_idxs: counts[widx][0] += 1 neg_words_idxs = sample(bins, int(neg_words_mult * len(word_idxs))) for neg_widx in neg_words_idxs: counts[neg_widx][1] += 1 vectors = syn0[counts.keys()] count_pairs = np.vstack(counts.values()) f = lambda w, params=(vectors, count_pairs[:, 0], count_pairs[:, 1], lbda): log_l(w, *params) x0 = np.zeros(syn0.shape[1] + 1) opt = lbfgsb.fmin_l_bfgs_b(f, x0) if opt[2]["warnflag"]: logging.debug("Error in optimization: %s", opt[2]) lr_vec = opt[0].astype(np.float32) if not np.all(np.isfinite(lr_vec)): logging.info("Error computing lr vector") lr_vec[:] = 0 return lr_vec
def __solver__(self, p): #WholeRepr2LinConst(p)#TODO: remove me bounds = [] # don't work in Python ver < 2.5 # BOUND = lambda x: x if isfinite(x) else None def BOUND(x): if isfinite(x): return x else: return None for i in range(p.n): bounds.append((BOUND(p.lb[i]), BOUND(p.ub[i]))) xf, ff, d = fmin_l_bfgs_b(p.f, p.x0, fprime=p.df, approx_grad=0, bounds=bounds, iprint=p.iprint, maxfun=p.maxFunEvals) if d['warnflag'] in (0, 2): # if 2 - some problems can be present, but final check from RunProbSolver will set negative istop if solution is unfeasible istop = SOLVED_WITH_UNIMPLEMENTED_OR_UNKNOWN_REASON if d['warnflag'] == 0: msg = 'converged' elif d['warnflag'] == 1: istop = IS_MAX_FUN_EVALS_REACHED p.xk = p.xf = xf p.fk = p.ff = ff p.istop = istop p.iterfcn()
def smoothData(self,x,y,weight,nMiss=0): ''' smooth data ''' import scipy.optimize.lbfgsb as lbfgsb from scipy.fftpack.realtransforms import dct,idct n0 = len(x) #x = np.array([x,x,x]).flatten() #y = np.array([y,y,y]).flatten() #weight = np.array([weight,weight,weight]).flatten() n = len(x) weight = 1./weight # scale 0 to 1 weight = weight/np.max(weight) i = np.arange(1,n+1) eigenvalues = -2. + 2.*np.cos((i-1)*np.pi/n) DCTy = dct(y,norm='ortho',type=2) dcty2 = DCTy**2 eigenvalues2 = eigenvalues**2 x0 = np.atleast_1d(1.) y_hat = np.zeros_like(y) xpost,f,d = lbfgsb.fmin_l_bfgs_b(gcv,x0,fprime=None,factr=10.,\ approx_grad=True,args=(y,weight,eigenvalues2,n,nMiss,y_hat)) solvedGamma = np.exp(xpost)[0] return y_hat,solvedGamma
def minimize_lbfgs(self, parameters, x, y): parameters2 = parameters.reshape([self.M], order="F") # minimizador L-BFGS-B result, _, _ = opt2.fmin_l_bfgs_b(self.get_objective, parameters2, args=[x, y], maxiter=50) return result.reshape([-1, 1], order="F")
def minimize_lbfgs(self, parameters, x, y, sigma, emp_counts, classes_idx, nr_x, nr_f, nr_c): parameters2 = parameters.reshape([nr_f * nr_c], order="F") result, _, d = opt2.fmin_l_bfgs_b( self.get_objective, parameters2, args=[x, y, sigma, emp_counts, classes_idx, nr_x, nr_f, nr_c]) return result.reshape([nr_f, nr_c], order="F")
def nls_lbfgs_b(S, D, C_init=None, l1_reg=0.1, max_iter=1000, tol=1e-4, callback=None): """Non-negative least squares solver using L-BFGS-B. """ S = ss.csr_matrix(check_array(S, accept_sparse='csr')) D = ss.csr_matrix(check_array(D, accept_sparse='csr')) n_features = S.shape n_components = D.shape[1] DtD = safe_sparse_dot(D.T, D) DtSD = safe_sparse_dot(D.T, safe_sparse_dot(S, D)) def f(C, *args): C = ss.diags(C) tonorm = S - safe_sparse_dot(D, safe_sparse_dot(C, D.T)) reg = l1_reg * C.diagonal().sum() return (0.5 * (ss.linalg.norm(tonorm)**2)) + reg def fprime(C, *args): C = ss.diags(C) DtDCDtD = safe_sparse_dot(DtD, safe_sparse_dot(C, DtD)) reg = l1_reg * ss.eye(C.shape[0]) full = DtDCDtD - DtSD + reg return full.diagonal() if C_init is None: C = np.zeros(n_components, dtype=np.float64) elif C_init.shape == (n_features, n_features): C = np.diag(C_init) else: C = C_init C, residual, d = fmin_l_bfgs_b( f, x0=C, fprime=fprime, pgtol=tol, bounds=[(0, None)] * n_components, maxiter=max_iter, callback=callback, ) # testing reveals that sometimes, very small negative values occur C[C < 0] = 0 if l1_reg: residual -= l1_reg * C.sum() residual = np.sqrt(2 * residual) if d['warnflag'] > 0: print("L-BFGS-B failed to converge") return C, residual
def train(self, w0, debug=False): if debug: iprint = 0 else: iprint = -1 x, f, d = fmin_l_bfgs_b(self.objective, w0, fprime=self.grad, pgtol=1e-09, iprint=iprint) if d["warnflag"] != 0: raise OptimisationException(d["task"]) return x
def train(self,w0,debug=False): if debug: iprint = 0 else: iprint = -1 x,f,d = fmin_l_bfgs_b(self.objective, w0, fprime=self.grad, pgtol=1e-09, iprint=iprint) if d['warnflag'] != 0: raise OptimisationException(d['task']) return x
def solve_l1l1_approx(X, y, lbda): make_l1l1_approx() f = lambda w, params=(X, y, lbda): l1l1_approx(w, *params) x0 = np.zeros(X.shape[1] + 1) opt = lbfgsb.fmin_l_bfgs_b(f, x0, bounds=[(0, None)] * x0.shape[0]) logging.debug(opt[2]) return opt[0].astype(np.float32)
def find2(self, POIMobj, motif_len, motif_start, base, path2pwm=None,solver="NLP"): self.motif_start = motif_start self.motif_len = motif_len x0 = tools.ini_pwm(motif_len, 1, len(base))[0] x0 = x0.flatten() lb = np.ones(x0.shape) * 0.001 ub = np.ones(x0.shape) * 0.999 iprint = 0 maxIter = 1000 ftol = 1e-04 gradtol = 1e-03 diffInt = 1e-05 contol = 1e-02 maxFunEvals = 1e04 maxTime = 100 lenA = int(len(x0)) lenk = int(len(x0)) / len(base) Aeq = np.zeros((lenk, lenA)) beq = np.ones(lenk) for i in range(lenk): for pk in range(i, lenA, lenk): Aeq[i, pk] = 1 # ,Aeq=Aeq,beq=beq, cons = {'type': 'eq', 'fun': lambda x: np.dot(Aeq, x) - beq} bnds = [] for i in range(len(x0)): bnds.append((lb[i], ub[i])) # bnds = np.vstack((lb,ub)) if solver == "ralg": from openopt import NLP p = NLP(self.f_L2, x0,lb=lb, ub=ub, Aeq=Aeq,beq=beq, args=(POIMobj.gPOIM,POIMobj.L,motif_start,POIMobj.small_k,motif_len), diffInt=diffInt, ftol=ftol, plot=0, iprint=iprint,maxIter = maxIter, maxFunEvals = maxFunEvals, show=False, contol=contol) result = p._solve(solver) x = result.xf f = result.ff elif solver == "LBFGSB": x, f, d = fmin_l_bfgs_b(self.f_L2, x0, args=(POIMobj.gPOIM, POIMobj.L, motif_start, POIMobj.small_k, motif_len), approx_grad=True)#constraints=cons)# elif solver == "SLSQP": result = minimize(self.f_L2, x0,args=(POIMobj.gPOIM, POIMobj.L, motif_start, POIMobj.small_k, motif_len),method='SLSQP',bounds=bnds,constraints=cons) x = result.x f = result.fun self.motif_pwm = np.reshape(x, (4, motif_len)) fopt = f self.normalize() if not(path2pwm is None): np.savetxt(path2pwm, self.poim_norm) return self.motif_pwm
def Run (self): self.iteration = 0 self.ma.initHistory() # print 'point1' (self.xopt, f, d) = fmin_l_bfgs_b (self.Objective, self.vl0, approx_grad=1, bounds = self.bounds, m = self.m, factr=self.factr, pgtol=self.pgtol, epsilon=self.epsilon, maxfun=self.maxfun) # print 'point2' self.Objective(self.xopt)
def train_lmbfgs(self): """ Train the model by maximising posterior with LM-BFGS. The training data should have been set at this stage: >> h = hcrf(H, maxw, maxf) >> h.X = X >> h.Y = Y >> h.lamb = lamb >> final_params = h.train_lmbfgs() Return the final parameter vector. """ initial = self.param[self.param_non_inf_indexes] fparam = fmin_l_bfgs_b(self.get_obj, initial) return fparam
def Run (self): self.iteration = 0 self.ma.initHistory() # print 'point1' vl0 = self.NormX (self.vl0) # print 'vl0=',self.vl0 if self.method == 'Opt_1D': (xa, xb) = self.bounds[0] # print 'xa=',xa,' xb=',xb xa = self.NormX ([xa]) xb = self.NormX ([xb]) self.xopt = fminbound (self.Objective1D, xa[0], xb[0], xtol = self.xtol, maxfun = self.maxfun) self.Objective1D(self.xopt) return elif self.method == 'NelderMead': self.xopt = fmin (self.Objective, vl0, xtol=self.xtol, ftol=self.ftol, maxfun=self.maxfun) elif self.method == 'Powell': self.xopt = fmin_powell (self.Objective, vl0, xtol=self.xtol, ftol=self.ftol, maxfun=self.maxfun) elif self.method == 'LBFGSB': bounds = self.NormBounds (self.bounds) (self.xopt, f, d) = fmin_l_bfgs_b (self.Objective, vl0, approx_grad=1, bounds = bounds, epsilon=self.epsilon, maxfun=self.maxfun) elif self.method == 'TNC': bounds = self.NormBounds (self.bounds) (self.xopt, f, d) = fmin_tnc (self.Objective, vl0, approx_grad=1, bounds = bounds, ftol=self.ftol, xtol=self.xtol, epsilon=self.epsilon, maxfun=self.maxfun) elif self.method == 'Anneal': (lower, upper) = self.NormBoundsAnneal(self.bounds) (self.xopt, r) = anneal (self.Objective, vl0, schedule = self.schedule, maxeval=self.maxfun, feps=self.ftol, lower=lower, upper=upper) elif self.method == 'Cobyla': self.isCobyla = 1 self.ce = self.CreateBounds(self.vl, self.fce) self.xopt = self.fmin_cobyla (self.Objective, vl0, self.ce, rhobeg=self.rhobeg, rhoend=self.rhoend, maxfun=self.maxfun) else: print print 'Optimization Error: method ', self.method, 'is absent' print return # print 'point2' self.Objective(self.xopt)
def optimization_layer(result, iprint=-1): """ Implementation of the Optimization layer. It uses L-BFGS [1] as special case of L-BFGS-B [2] in scipy.optimize. The result object is modified to yield the optimal BEModel. A sub-dictionary with additional information is added under the key result.additional['Opt']. [1] D.C. Liu and J. Nocedal. ``On the Limited Memory Method for Large Scale Optimization'', Math. Prog. B 45 (3), pp.~503--528, 1989. DOI 10.1007/BF01589116 [2] C. Zhu, R.H. Byrd and J. Nocedal, ``Algorithm 778: L-BFGS-B: Fortran subroutines for large-scale bound-constrained optimization'', ACM Trans. Math. Software 23 (4), pp.~550--560, 1997. DOI 10.1145/279232.279236 Parameters ---------- result : object A valid :py:class:`cobea.model.Result` object. The object is modified during processing; the model variables are set to their optimal values. iprint : int (Optional) verbosity of fmin_l_bfgs_b. Default: -1 Returns ------- result : object Identical to input object. """ x = result._to_statevec() print('Optimization layer: running with %i model parameters...' % result.ndim) xopt, fval, optimizer_dict = fmin_l_bfgs_b(result._gradient, x, args=(result.input_matrix, ), iprint=iprint, maxiter=int(2e4), factr=100) print(' ...finished with %i gradient (L-BFGS) iterations.' % optimizer_dict['nit']) print(' chi^2 = %.3e (%s)^2' % (fval, result.unit)) result._from_statevec(xopt) result.additional['Opt'] = optimizer_dict return result
def _fit_inner(self, X, y, activations, deltas, coef_grads, intercept_grads, layer_units): # Store meta information for the parameters self._coef_indptr = [] self._intercept_indptr = [] start = 0 # Save sizes and indices of coefficients for faster unpacking for i in range(self.n_layers_ - 1): n_fan_in, n_fan_out = layer_units[i], layer_units[i + 1] end = start + (n_fan_in * n_fan_out) self._coef_indptr.append((start, end, (n_fan_in, n_fan_out))) start = end # Save sizes and indices of intercepts for faster unpacking for i in range(self.n_layers_ - 1): end = start + layer_units[i + 1] self._intercept_indptr.append((start, end)) start = end # Run LBFGS packed_coef_inter = self._pack(self.coefs_, self.intercepts_) if self.verbose is True or self.verbose >= 1: iprint = 1 else: iprint = -1 optimal_parameters, self.loss_, d = fmin_l_bfgs_b( x0=packed_coef_inter, func=self._loss_grad_lbfgs, maxfun=self.max_iter, iprint=iprint, pgtol=self.tol, args=(X, y, activations, deltas, coef_grads, intercept_grads)) self._unpack(optimal_parameters)
def train(self,debug=False): """Train the mixture model.""" if debug: iprint = 0 else: iprint = -1 # Initialise weights to zero, except interpolation num_phrase_features = self.phrase_index[1] - self.phrase_index[0] num_models = ((self.interp_index[1] - self.interp_index[0])/num_phrase_features)+1 w0 = [0.0] * self.interp_index[0] w0 += [1.0/num_models] * (self.interp_index[1]-self.interp_index[0]) bounds = [(None,None)] * len(w0) bounds[self.interp_index[0]:self.interp_index[1]] = \ [(self.interp_floor,1)] * (self.interp_index[1] - self.interp_index[0]) w0 = np.array(w0) x,f,d = fmin_l_bfgs_b(self.objective, w0, fprime=self.gradient, bounds=bounds, pgtol=1e-09, iprint=iprint) if d['warnflag'] != 0: raise OptimisationException(d['task']) weights = x[:self.interp_index[0]] mix_weights = x[self.interp_index[0]:] mix_weights = mix_weights.reshape((num_models-1,num_phrase_features)) mix_weights = np.vstack((mix_weights, 1-np.sum(mix_weights,axis=0))) return weights,mix_weights
def train(self, debug=False): """Train the mixture model.""" if debug: iprint = 0 else: iprint = -1 # Initialise weights to zero, except interpolation num_phrase_features = self.phrase_index[1] - self.phrase_index[0] num_models = ((self.interp_index[1] - self.interp_index[0]) / num_phrase_features) + 1 w0 = [0.0] * self.interp_index[0] w0 += [1.0 / num_models] * (self.interp_index[1] - self.interp_index[0]) bounds = [(None, None)] * len(w0) bounds[self.interp_index[0] : self.interp_index[1]] = [(self.interp_floor, 1)] * ( self.interp_index[1] - self.interp_index[0] ) w0 = np.array(w0) x, f, d = fmin_l_bfgs_b(self.objective, w0, fprime=self.gradient, bounds=bounds, pgtol=1e-09, iprint=iprint) if d["warnflag"] != 0: raise OptimisationException(d["task"]) weights = x[: self.interp_index[0]] mix_weights = x[self.interp_index[0] :] mix_weights = mix_weights.reshape((num_models - 1, num_phrase_features)) mix_weights = np.vstack((mix_weights, 1 - np.sum(mix_weights, axis=0))) return weights, mix_weights
def fit(self, X, y): """Fit the model according to the given training data. Parameters ---------- X : List of list of ints. Each list of ints represent a training example. Each int in that list is the index of a one-hot encoded feature. y : array-like, shape (n_samples,) Target vector relative to X. Returns ------- self : object Returns self. """ classes = list(set(y)) num_classes = len(classes) self.classes_ = classes if self.transitions is None: self.transitions = self._create_default_transitions(num_classes, self.num_states) # Initialise the parameters _, num_features = X[0].shape num_transitions, _ = self.transitions.shape numpy.random.seed(self._random_seed) if self.state_parameters is None: self.state_parameters = numpy.random.standard_normal((num_features, self.num_states, num_classes)) * self.state_parameter_noise if self.transition_parameters is None: self.transition_parameters = numpy.random.standard_normal((num_transitions)) * self.transition_parameter_noise initial_parameter_vector = self._stack_parameters(self.state_parameters, self.transition_parameters) function_evaluations = [0] def objective_function(parameter_vector, batch_start_index=0, batch_end_index=-1): ll = 0.0 gradient = numpy.zeros_like(parameter_vector) state_parameters, transition_parameters = self._unstack_parameters(parameter_vector) for x, ty in zip(X, y)[batch_start_index: batch_end_index]: y_index = classes.index(ty) dll, dgradient_state, dgradient_transition = log_likelihood(x, y_index, state_parameters, transition_parameters, self.transitions) dgradient = self._stack_parameters(dgradient_state, dgradient_transition) ll += dll gradient += dgradient parameters_without_bias = numpy.array(parameter_vector) # exclude the bias parameters from being regularized parameters_without_bias[0] = 0 ll -= self.l2_regularization * numpy.dot(parameters_without_bias.T, parameters_without_bias) gradient = gradient.flatten() - 2.0 * self.l2_regularization * parameters_without_bias if batch_start_index == 0: function_evaluations[0] += 1 if self._verbosity > 0 and function_evaluations[0] % self._verbosity == 0: print '{:10} {:10.2f} {:10.2f}'.format(function_evaluations[0], ll, sum(abs(gradient))) return -ll, -gradient # If the stochastic gradient stepsize is defined, do 1 epoch of SGD to initialize the parameters. if self._sgd_stepsize: total_nll = 0.0 for i in range(len(y)): nll, ngradient = objective_function(initial_parameter_vector, i, i + 1) total_nll += nll initial_parameter_vector -= ngradient * self._sgd_stepsize if self._sgd_verbosity > 0: if i % self._sgd_verbosity == 0: print '{:10} {:10.2f} {:10.2f}'.format(i, -total_nll / (i + 1) * len(y), sum(abs(ngradient))) self._optimizer_result = fmin_l_bfgs_b(objective_function, initial_parameter_vector, **self.optimizer_kwargs) self.state_parameters, self.transition_parameters = self._unstack_parameters(self._optimizer_result[0]) return self
def learn(data, p_init, p_bounds, parametrisation, A, reg_func, loss, penalty, params, track=False): if track: tracker = ObjectiveTracker(params, parametrisation, print_to_stdout=True) @tracker def obj(p, data, params): f_data, f_pen, g = obj_func_general_parametrisation( p, data, parametrisation, A, reg_func, loss, penalty, params) return f_data, f_pen, g else: counter = 0 def callback(p): nonlocal counter counter += 1 S, alpha, eps = parametrisation(torch.tensor(p), params) S = S.reshape(-1).cpu().numpy() alpha = alpha.cpu().numpy() print( '\nIteration #{}: Current sampling rate {:.1f}%, alpha {:.2e}, eps {:.2e}' .format(counter, np.mean(S > 0) * 100, alpha.item(), eps.item())) def obj(p, data, params): f_data, f_pen, g = obj_func_general_parametrisation( p, data, parametrisation, A, reg_func, loss, penalty, params) return f_data + f_pen, g start_time = datetime.datetime.now() if 'pgtol' in params['alg_params']['LBFGSB']: pgtol = params['alg_params']['LBFGSB']['pgtol'] else: pgtol = 1e-10 if 'maxit' in params['alg_params']['LBFGSB']: maxiter = params['alg_params']['LBFGSB']['maxit'] else: maxiter = 1000 print('Learning sampling pattern:') p, _, info = fmin_l_bfgs_b( lambda p: obj(p, data, params), p_init, bounds=p_bounds, pgtol=pgtol, factr=0, maxiter=maxiter, callback=tracker.callback if track else callback) end_time = datetime.datetime.now() elapsed_time = end_time - start_time results = {'elapsed_time': elapsed_time, 'p': p, 'info': info} if track: results['tracker'] = tracker return results
def smoothn(y, nS0=10, axis=None, smoothOrder=2.0, sd=None, verbose=False, s0=None, z0=None, isrobust=False, w=None, s=None, max_iter=100, tol_z=1e-3, weightstr='bisquare'): """ Robust spline smoothing for 1-D to n-D data. SMOOTHN provides a fast, automatized and robust discretized smoothing spline for data of any dimension. Parameters ---------- y : numpy array or numpy masked array The data to be smoothed. nS0 : int, optional The number of samples to use when estimating the smoothing parameter. Default value is 10. smoothOrder : float, optional The polynomial order to smooth the function to. Default value is 2.0. sd : numpy array, optional Weighting of the data points in standard deviation format. Deafult is to not weight by standard deviation. verbose : { True, False }, optional Create extra logging during operation. s0 : float, optional Initial value of the smoothing parameter. Defaults to no value, being instead derived from calculation. z0 : float, optional Initial estimate of the smoothed data. isrobust : { False, True } Whether the smoothing applies the robust smoothing algorithm. This allows the smoothing to ignore outlier data without creating large spikes to fit the data. w : numpy array, optional Linear wighting to apply to the data. Default is to assume no linear weighting. s : float Initial smoothing parameter. Default is to calculate a value. max_iter : int, optional The maximum number of iterations to attempt the smoothing. Default is 100 iterations. tol_z: float, optional Tolerance at which the smoothing will be considered converged. Default value is 1e-3 weightstr : { 'bisquare', 'cauchy', 'talworth'}, optional The type of weighting applied to the data when performing robust smoothing. Returns ------- (z, s, exitflag) A tuple of the returned results. z : numpy array The smoothed data. s : float The value of the smoothing parameter used to perform this smoothing. exitflag : {0, -1} A return flag of 0 indicates successfuly execution, -1 an error (see the log). Notes ----- Z = SMOOTHN(Y) automatically smoothes the uniformly-sampled array Y. Y can be any n-D noisy array (time series, images, 3D data,...). Non finite data (NaN or Inf) are treated as missing values. Z = SMOOTHN(Y,S) smoothes the array Y using the smoothing parameter S. S must be a real positive scalar. The larger S is, the smoother the output will be. If the smoothing parameter S is omitted (see previous option) or empty (i.e. S = []), it is automatically determined using the generalized cross-validation (GCV) method. Z = SMOOTHN(Y,w) or Z = SMOOTHN(Y,w,S) specifies a weighting array w of real positive values, that must have the same size as Y. Note that a nil weight corresponds to a missing value. Robust smoothing ---------------- Z = SMOOTHN(...,'robust') carries out a robust smoothing that minimizes the influence of outlying data. [Z,S] = SMOOTHN(...) also returns the calculated value for S so that you can fine-tune the smoothing subsequently if needed. An iteration process is used in the presence of weighted and/or missing values. Z = SMOOTHN(...,OPTION_NAME,OPTION_VALUE) smoothes with the termination parameters specified by OPTION_NAME and OPTION_VALUE. They can contain the following criteria: ----------------- tol_z: Termination tolerance on Z (default = 1e-3) tol_z must be in ]0,1[ max_iter: Maximum number of iterations allowed (default = 100) Initial: Initial value for the iterative process (default = original data) ----------------- Syntax: [Z,...] = SMOOTHN(...,'max_iter',500,'tol_z',1e-4,'Initial',Z0); [Z,S,EXITFLAG] = SMOOTHN(...) returns a boolean value EXITFLAG that describes the exit condition of SMOOTHN: 1 SMOOTHN converged. 0 Maximum number of iterations was reached. Class Support ------------- Input array can be numeric or logical. The returned array is of class double. Notes ----- The n-D (inverse) discrete cosine transform functions <a href="matlab:web('http://www.biomecardio.com/matlab/dctn.html')" >DCTN</a> and <a href="matlab:web('http://www.biomecardio.com/matlab/idctn.html')" >IDCTN</a> are required. To be made ---------- Estimate the confidence bands (see Wahba 1983, Nychka 1988). Reference --------- Garcia D, Robust smoothing of gridded data in one and higher dimensions with missing values. Computational Statistics & Data Analysis, 2010 <a href="matlab:web('http://www.biomecardio.com/pageshtm/publi/csda10.pdf')" >PDF download</a> Examples: -------- # 1-D example x = linspace(0,100,2**8); y = cos(x/10)+(x/50)**2 + randn(size(x))/10; y[[70, 75, 80]] = [5.5, 5, 6]; z = smoothn(y); # Regular smoothing zr = smoothn(y,'robust'); # Robust smoothing subplot(121), plot(x,y,'r.',x,z,'k','LineWidth',2) axis square, title('Regular smoothing') subplot(122), plot(x,y,'r.',x,zr,'k','LineWidth',2) axis square, title('Robust smoothing') # 2-D example xp = 0:.02:1; [x,y] = meshgrid(xp); f = exp(x+y) + sin((x-2*y)*3); fn = f + randn(size(f))*0.5; fs = smoothn(fn); subplot(121), surf(xp,xp,fn), zlim([0 8]), axis square subplot(122), surf(xp,xp,fs), zlim([0 8]), axis square # 2-D example with missing data n = 256; y0 = peaks(n); y = y0 + rand(size(y0))*2; I = randperm(n^2); y(I(1:n^2*0.5)) = NaN; # lose 1/2 of data y(40:90,140:190) = NaN; # create a hole z = smoothn(y); # smooth data subplot(2,2,1:2), imagesc(y), axis equal off title('Noisy corrupt data') subplot(223), imagesc(z), axis equal off title('Recovered data ...') subplot(224), imagesc(y0), axis equal off title('... compared with original data') # 3-D example [x,y,z] = meshgrid(-2:.2:2); xslice = [-0.8,1]; yslice = 2; zslice = [-2,0]; vn = x.*exp(-x.^2-y.^2-z.^2) + randn(size(x))*0.06; subplot(121), slice(x,y,z,vn,xslice,yslice,zslice,'cubic') title('Noisy data') v = smoothn(vn); subplot(122), slice(x,y,z,v,xslice,yslice,zslice,'cubic') title('Smoothed data') # Cardioid t = linspace(0,2*pi,1000); x = 2*cos(t).*(1-cos(t)) + randn(size(t))*0.1; y = 2*sin(t).*(1-cos(t)) + randn(size(t))*0.1; z = smoothn(complex(x,y)); plot(x,y,'r.',real(z),imag(z),'k','linewidth',2) axis equal tight # Cellular vortical flow [x,y] = meshgrid(linspace(0,1,24)); Vx = cos(2*pi*x+pi/2).*cos(2*pi*y); Vy = sin(2*pi*x+pi/2).*sin(2*pi*y); Vx = Vx + sqrt(0.05)*randn(24,24); # adding Gaussian noise Vy = Vy + sqrt(0.05)*randn(24,24); # adding Gaussian noise I = randperm(numel(Vx)); Vx(I(1:30)) = (rand(30,1)-0.5)*5; # adding outliers Vy(I(1:30)) = (rand(30,1)-0.5)*5; # adding outliers Vx(I(31:60)) = NaN; # missing values Vy(I(31:60)) = NaN; # missing values Vs = smoothn(complex(Vx,Vy),'robust'); # automatic smoothing subplot(121), quiver(x,y,Vx,Vy,2.5), axis square title('Noisy velocity field') subplot(122), quiver(x,y,real(Vs),imag(Vs)), axis square title('Smoothed velocity field') See also SMOOTH, SMOOTH3, DCTN, IDCTN. -- Damien Garcia -- 2009/03, revised 2010/11 Visit my <a href="matlab:web('http://www.biomecardio.com/matlab/smoothn.html') >website</a> for more details about SMOOTHN # Check input arguments error(nargchk(1,12,nargin)); z0=None,w=None,s=None,max_iter=100,tol_z=1e-3 """ (y, w) = preprocessing(y, w, sd) sizy = y.shape # sort axis if axis is None: axis = tuple(np.arange(y.ndim)) noe = y.size # number of elements if noe < 2: return y, s, EXIT_SUCCESS, W_TOT_DEFAULT # --- # "Weighting function" criterion weightstr = weightstr.lower() # --- # Weights. Zero weights are assigned to not finite values (Inf or NaN), # (Inf/NaN values = missing data). is_finite = np.isfinite(y) nof = np.sum(is_finite) # number of finite elements # --- # Weighted or missing data? isweighted = np.any(w != 1) # --- # Automatic smoothing? isauto = not s # Creation of the Lambda tensor lambda_ = define_lambda(y, axis) # Upper and lower bound for the smoothness parameter s_min_bnd, s_max_bnd = smoothness_bounds(y) # Initialize before iterating y_tensor_rank = np.sum(np.array(sizy) != 1) # tensor rank of the y-array # --- w_tot = w # --- Initial conditions for z z = initial_z(y, z0, isweighted) # --- z0 = z y[~is_finite] = 0 # arbitrary values for missing y-data # --- tol = 1.0 robust_iterative_process = True robust_step = 1 nit = 0 # --- Error on p. Smoothness parameter s = 10^p errp = 0.1 # opt = optimset('TolX',errp); # --- Relaxation factor relaxation_factor: to speedup convergence relaxation_factor = 1 + 0.75 * isweighted # ?? # Main iterative process # --- xpost = init_xpost(s, s_min_bnd, s_max_bnd, isauto) while robust_iterative_process: # --- "amount" of weights (see the function GCVscore) aow = np.sum(w_tot) / noe # 0 < aow <= 1 # --- while tol > tol_z and nit < max_iter: if verbose: LOG.info(f"tol {tol:s} nit {nit:s}") nit = nit + 1 dct_y = dctND(w_tot * (y - z) + z, f=dct) if isauto and not np.remainder(np.log2(nit), 1): # --- # The generalized cross-validation (GCV) method is used. # We seek the smoothing parameter s that minimizes the GCV # score i.e. s = Argmin(GCVscore). # Because this process is time-consuming, it is performed from # time to time (when nit is a power of 2) # --- # errp in here somewhere # bounds = [(log10(s_min_bnd),log10(s_max_bnd))] # args = (lambda_, aow,dct_y,is_finite,w_tot,y,nof,noe) # xpost,f,d = lbfgsb.fmin_l_bfgs_b(gcv,xpost,fprime=None, # factr=10., approx_grad=True,bounds=,bounds\ # args=args) # if we have no clue what value of s to use, better span the # possible range to get a reasonable starting point ... # only need to do it once though. nS0 is the number of samples # used if not s0: ss = np.arange(nS0) * (1.0 / (nS0 - 1.0)) * (np.log10( s_max_bnd) - np.log10(s_min_bnd)) + np.log10(s_min_bnd) g = np.zeros_like(ss) for i, p in enumerate(ss): g[i] = gcv(p, lambda_, aow, dct_y, is_finite, w_tot, y, nof, noe, smoothOrder) xpost = [ss[g == g.min()]] else: xpost = [s0] bounds = [(np.log10(s_min_bnd), np.log10(s_max_bnd))] args = (lambda_, aow, dct_y, is_finite, w_tot, y, nof, noe, smoothOrder) xpost, _, _ = lbfgsb.fmin_l_bfgs_b(gcv, xpost, fprime=None, factr=1e7, approx_grad=True, bounds=bounds, args=args) s = 10**xpost[0] # update the value we use for the initial s estimate s0 = xpost[0] gamma = gamma_from_lambda(lambda_, s, smoothOrder) z = relaxation_factor * dctND(gamma*dct_y, f=idct) +\ (1 - relaxation_factor) * z # if no weighted/missing data => tol=0 (no iteration) tol = isweighted * norm(z0 - z) / norm(z) z0 = z # re-initialization exitflag = nit < max_iter if isrobust: # -- Robust Smoothing: iteratively re-weighted process # --- average leverage h = np.sqrt(1 + 16.0 * s) h = np.sqrt(1 + h) / np.sqrt(2) / h h = h**y_tensor_rank # --- take robust weights into account w_tot = w * robust_weights(y - z, is_finite, h, weightstr) # --- re-initialize for another iterative weighted process isweighted = True tol = 1 nit = 0 # --- robust_step = robust_step + 1 # 3 robust steps are enough. robust_iterative_process = robust_step < 3 else: robust_iterative_process = False # stop the whole process # Warning messages # --- if isauto: limit = "" if np.abs(np.log10(s) - np.log10(s_min_bnd)) < errp: limit = "lower" elif np.abs(np.log10(s) - np.log10(s_max_bnd)) < errp: limit = "upper" warning(f"smoothn:S{limit.capitalize()}Bound", [ f"s = {s:.3f}: the {limit} bound for s has been reached. " + "Put s as an input variable if required." ]) return z, s, exitflag, w_tot
''' from scipy.optimize.lbfgsb import fmin_l_bfgs_b bou = np.array([[-1, 1], [0, 1]]) def f(x): x_1 = x[0] x_2 = x[1] return np.sin(x_1 + x_2) * x_1 print(len(bou)) x0 = [0.2, 0.3] x_1, f_1, d_1 = fmin_l_bfgs_b(f, x0, bounds=bou, maxfun=1500, approx_grad=True) print(x_1) print(f_1) print(d_1) ''' def f_d(x_1, x_2): return pdist(np.vstack([x_1, x_2])) dis = [f_d(x[i], y) for i in range(x.shape[0])] print(dis)
def optimize(self, parallel=False, parallel_verbose=0, **kwargs): # -- Getting parameters -- # ------------------------ #Upper level inputs mask_type = kwargs.get("mask_type", "") learn_mask = kwargs.get("learn_mask", True) learn_alpha = kwargs.get("learn_alpha", True) l0 = kwargs.get("l0", None) p0 = kwargs.get("p0", None) shots = False # -- Checking inputs -- if mask_type in ["cartesian", "radial_CO"]: if l0 is None: raise ValueError( "an initial mask parametrisation l0 must be given") shots = True else: if p0 is None: raise ValueError("an initial mask p0 must be given") t1 = time.time() self.niter = 0 # -- Initializing -- # ------------------ print("Multithread:", parallel) if shots: n = len(l0) - 1 self.alphas = [l0[-1]] else: n = len(p0) - 1 self.alphas = [p0[-1]] self.energy_upper = [ E(lk=l0, pk=p0, mask_type=mask_type, images=self.images, kspace_data=self.kspace_data, samples=self.samples, wavelet_name=self.wavelet_name, wavelet_scale=self.wavelet_scale, param=self.param, verbose=self.verbose, const=self.const, n_rad=self.n_rad, parallel=parallel) ] # -- Using L-BFGS-B -- # -------------------- if shots: #Optimize l lf, _, _ = fmin_l_bfgs_b( lambda x: E(lk=x, mask_type=mask_type, images=self.images, kspace_data=self.kspace_data, samples=self.samples, wavelet_name=self.wavelet_name, wavelet_scale=self.wavelet_scale, param=self.param, verbose=self.verbose, const=self.const, n_rad=self.n_rad, parallel=parallel, parallel_verbose=parallel_verbose), l0, lambda x: grad_E(lk=x, mask_type=mask_type, images=self.images, kspace_data=self.kspace_data, samples=self.samples, wavelet_name=self.wavelet_name, wavelet_scale=self.wavelet_scale, param=self.param, verbose=self.verbose, const=self.const, n_rad=self.n_rad, learn_mask=learn_mask, learn_alpha=learn_alpha, parallel=parallel, parallel_verbose=parallel_verbose), bounds=[(0, 1)] * n + [(1e-10, np.inf)], pgtol=self.pgtol, maxfun=self.maxfun, maxiter=self.maxiter, maxls=2, callback=lambda x: self.fcall(x, mask_type)) else: #Optimize p directly pf, _, _ = fmin_l_bfgs_b( lambda x: E(pk=x, mask_type=mask_type, images=self.images, kspace_data=self.kspace_data, samples=self.samples, wavelet_name=self.wavelet_name, wavelet_scale=self.wavelet_scale, param=self.param, verbose=self.verbose, const=self.const, n_rad=self.n_rad, parallel=parallel, parallel_verbose=parallel_verbose), p0, lambda x: grad_E(pk=x, mask_type=mask_type, images=self.images, kspace_data=self.kspace_data, samples=self.samples, wavelet_name=self.wavelet_name, wavelet_scale=self.wavelet_scale, param=self.param, verbose=self.verbose, const=self.const, n_rad=self.n_rad, learn_mask=learn_mask, learn_alpha=learn_alpha, parallel=parallel, parallel_verbose=parallel_verbose), bounds=[(0, 1)] * n + [(1e-10, np.inf)], pgtol=self.pgtol, maxfun=self.maxfun, maxiter=self.maxiter, maxls=2, callback=lambda x: self.fcall(x, mask_type)) # -- Returning output -- # ---------------------- print("\033[1m" + f"\nFINISHED IN {time.time()-t1} SECONDS\n" + "\033[0m") if shots: return lf, self.energy_upper, self.alphas else: return pf, self.energy_upper, self.alphas
def train(): np.random.seed(131742) #get sentences, trees and labels nExamples = -1 print "loading data.." rnnData = RNNDataCorpus() rnnData.load_data(load_file=config.train_data, nExamples=nExamples) #initialize params print "initializing params" params = Params(data=rnnData, wordSize=50, rankWo=2) #define theta #one vector for all the parameters of mvrnn model: W, Wm, Wlabel, L, Lm n = params.wordSize fanIn = params.fanIn nWords = params.nWords nLabels = params.categories rank = params.rankWo Wo = 0.01 * np.random.randn(n + 2 * n * rank, nWords) #Lm, as in paper Wo[:n, :] = np.ones((n, Wo.shape[1])) #Lm, as in paper Wcat = 0.005 * np.random.randn(nLabels, fanIn) #Wlabel, as in paper # Wv = 0.01*np.random.randn(n, nWords) # WO = 0.01*np.random.randn(n, 2*n) # W = 0.01*np.random.randn(n, 2*n+1) #load pre-trained weights here mats = sio.loadmat(config.pre_trained_weights) Wv = mats.get('Wv') #L, as in paper W = mats.get('W') #W, as in paper WO = mats.get('WO') #Wm, as in paper sentencesIdx = np.arange(rnnData.ndoc()) np.random.shuffle(sentencesIdx) nTrain = 4 * len(sentencesIdx) / 5 trainSentIdx = sentencesIdx[0:nTrain] testSentIdx = sentencesIdx[nTrain:] batchSize = 5 nBatches = len(trainSentIdx) / batchSize evalFreq = 5 #evaluate after every 5 minibatches nTestSentEval = 50 #number of test sentences to be evaluated rnnData_train = RNNDataCorpus() rnnData.copy_into_minibatch(rnnData_train, trainSentIdx) rnnData_test = RNNDataCorpus() if (len(testSentIdx) > nTestSentEval): # np.random.shuffle(testSentIdx) #choose random test examples thisTestSentIdx = testSentIdx[:nTestSentEval] else: thisTestSentIdx = testSentIdx rnnData.copy_into_minibatch(rnnData_test, thisTestSentIdx) # [Wv_test, Wo_test, _] = getRelevantWords(rnnData_test, Wv,Wo,params) [Wv_trainTest, Wo_trainTest, all_train_idx ] = getRelevantWords(rnnData, Wv, Wo, params) #sets nWords_reduced, returns new arrays theta = np.concatenate((W.flatten(), WO.flatten(), Wcat.flatten(), Wv_trainTest.flatten(), Wo_trainTest.flatten())) #optimize print "starting training..." nIter = 100 rnnData_minibatch = RNNDataCorpus() for i in range(nIter): #train in minibatches # ftrain = np.zeros(nBatches) # for ibatch in range(nBatches): # set_minibatch(rnnData, rnnData_minibatch, ibatch, nBatches, trainSentIdx) # print 'Iteration: ', i, ' minibatch: ', ibatch tunedTheta, fbatch_train, _ = lbfgsb.fmin_l_bfgs_b( func=costFn, x0=theta, fprime=None, args=(rnnData_train, params), approx_grad=0, bounds=None, m=5, factr=1000000000000000.0, pgtol=1.0000000000000001e-5, epsilon=1e-08, iprint=3, maxfun=1, disp=0) #map parameters back W[:, :], WO[:, :], Wcat[:, :], Wv_trainTest, Wo_trainTest = unroll_theta( tunedTheta, params) Wv[:, all_train_idx] = Wv_trainTest Wo[:, all_train_idx] = Wo_trainTest # ftrain[ibatch] = fbatch_train theta = tunedTheta #for next iteration print "========================================" print "XXXXXXIteration ", i, print "Average cost: ", np.average(fbatch_train) evaluate(Wv, Wo, W, WO, Wcat, params, rnnData_test) print "========================================" #save weights save_dict = {'Wv': Wv, 'Wo': Wo, 'Wcat': Wcat, 'W': W, 'WO': WO} sio.savemat(config.saved_params_file + '_lbfgs_iter' + str(i), mdict=save_dict) print "saved tuned theta. "
def fit(self, X, y): """Fit the model according to the given training data. Parameters ---------- X : List of list of ints. Each list of ints represent a training example. Each int in that list is the index of a one-hot encoded feature. y : array-like, shape (n_samples,) Target vector relative to X. Returns ------- self : object Returns self. """ classes = list(set(y)) num_classes = len(classes) self.classes_ = classes if self.transitions is None: self.transitions = self._create_default_transitions( num_classes, self.num_states) # Initialise the parameters _, num_features = X[0].shape num_transitions, _ = self.transitions.shape numpy.random.seed(self._random_seed) if self.state_parameters is None: self.state_parameters = numpy.random.standard_normal( (num_features, self.num_states, num_classes)) * self.state_parameter_noise if self.transition_parameters is None: self.transition_parameters = numpy.random.standard_normal( (num_transitions)) * self.transition_parameter_noise initial_parameter_vector = self._stack_parameters( self.state_parameters, self.transition_parameters) function_evaluations = [0] def objective_function(parameter_vector, batch_start_index=0, batch_end_index=-1): ll = 0.0 gradient = numpy.zeros_like(parameter_vector) state_parameters, transition_parameters = self._unstack_parameters( parameter_vector) for x, ty in zip(X, y)[batch_start_index:batch_end_index]: y_index = classes.index(ty) dll, dgradient_state, dgradient_transition = log_likelihood( x, y_index, state_parameters, transition_parameters, self.transitions) dgradient = self._stack_parameters(dgradient_state, dgradient_transition) ll += dll gradient += dgradient parameters_without_bias = numpy.array( parameter_vector ) # exclude the bias parameters from being regularized parameters_without_bias[0] = 0 ll -= self.l2_regularization * numpy.dot(parameters_without_bias.T, parameters_without_bias) gradient = gradient.flatten( ) - 2.0 * self.l2_regularization * parameters_without_bias if batch_start_index == 0: function_evaluations[0] += 1 if self._verbosity > 0 and function_evaluations[ 0] % self._verbosity == 0: print '{:10} {:10.2f} {:10.2f}'.format( function_evaluations[0], ll, sum(abs(gradient))) return -ll, -gradient # If the stochastic gradient stepsize is defined, do 1 epoch of SGD to initialize the parameters. if self._sgd_stepsize: total_nll = 0.0 for i in range(len(y)): nll, ngradient = objective_function(initial_parameter_vector, i, i + 1) total_nll += nll initial_parameter_vector -= ngradient * self._sgd_stepsize if self._sgd_verbosity > 0: if i % self._sgd_verbosity == 0: print '{:10} {:10.2f} {:10.2f}'.format( i, -total_nll / (i + 1) * len(y), sum(abs(ngradient))) self._optimizer_result = fmin_l_bfgs_b(objective_function, initial_parameter_vector, **self.optimizer_kwargs) self.state_parameters, self.transition_parameters = self._unstack_parameters( self._optimizer_result[0]) return self
def learn(self, X, y): """ Learn the model from the given data. :param X: the attribute data :type X: numpy.array :param y: the class variable data :type y: numpy.array """ def rand(eps): """Return random number in interval [-eps, eps].""" return rnd.random() * 2 * eps - eps def g_func(z): """The sigmoid (logistic) function.""" return 1. / (1. + np.exp(-z)) def h_func(thetas, x): """The model function.""" a = np.array([[1.] + list(x)]).T # Initialize a for l in range(1, len(thetas) + 1): # Forward propagation a = np.vstack((np.array([[1.]]), g_func(thetas[l - 1].T.dot(a)))) return a[1:] def llog(val): "The limited logarithm." e = 1e-10 return np.log(np.clip(val, e, 1. - e)) def unroll(thetas): """Unrolls a list of thetas into vector.""" sd = [m.shape for m in thetas] # Keep the shape data thetas = np.concatenate([theta.reshape(np.prod(theta.shape))for theta in thetas]) return thetas, sd def roll(thetas, sd): """Rolls a vector of thetas back into list.""" thetas = np.split(thetas, [sum(np.prod(s) for s in sd[:i]) + np.prod(sd[i]) for i in range(len(sd) - 1)]) return [np.reshape(theta, sd[i]) for i, theta in enumerate(thetas)] def cost(thetas, X, y, sd, S, lambda_): """The cost function of the neural network.""" thetas = roll(thetas, sd) m, _ = X.shape L = len(S) reg_factor = (lambda_ / float(2 * m)) * sum(sum(sum(thetas[l][1:, :]**2)) for l in range(L - 1)) cost = (-1. / float(m)) * sum(sum(self._classes_map[y[s]] * llog(h_func(thetas, X[s])) + (1. - self._classes_map[y[s]]) * llog(1. - h_func(thetas, X[s]))) for s in range(m)) + reg_factor if self._verbose: print "Current value of cost func.: " + str(cost[0]) return cost[0] def grad(thetas, X, y, sd, S, lambda_): """The gradient (derivate) function which includes the back propagation algorithm.""" thetas = roll(thetas, sd) m, _ = X.shape L = len(S) d = [np.array([[0. for j in range(S[l + 1])] for i in range(S[l] + 1)]) for l in range(L - 1)] # Initialize the delta matrix for s in range(m): a = [np.array([[1.] + list(X[s])]).T] # Initialize a (only a, d & theta matrices have 1 more element in columns, biases) for l in range(1, L): # Forward propagation a.append(np.vstack((np.array([[1.]]), g_func(thetas[l - 1].T.dot(a[l - 1]))))) # TODO # Softmax: treat last a column differently #ez = np.exp(thetas[L - 2].T.dot(a[L - 2])) #sez = sum(ez) #a.append(np.vstack((np.array([[1]]), ez / sez))) deltas = [None for l in range(L - 1)] + [a[-1][1:] - self._classes_map[y[s]]] for l in range(L - 2, 0, -1): # Backward propagation deltas[l] = (thetas[l].dot(deltas[l + 1]) * (a[l] * (1. - a[l])))[1:] for l in range(L - 1): d[l] = d[l] + a[l].dot(deltas[l + 1].T) D = [(1. / float(m)) * d[l] + lambda_ * thetas[l] for l in range(L - 1)] D = [Di - lambda_ * np.vstack((thetas[l][0], np.zeros((Di.shape[0] - 1, Di.shape[1])))) for l, Di in enumerate(D)] # Where i = 0, don't use regularization D, _ = unroll(D) return D def gradApprox(thetas, X, y, sd, S, lambda_): """Approximate the gradient of the cost function (only used for debugging, not in final version).""" eps = 1e-14 return (grad(thetas + eps, X, y, sd, S, lambda_) - grad(thetas - eps, X, y, sd, S, lambda_)) / float(2 * eps) # Set the random seed rnd.seed(self._seed) # Initialize the final layer of neural net (outputs) self._classes = list(set(y)) for i, cl in enumerate(self._classes): self._classes_map[cl] = np.zeros((len(self._classes), 1)) self._classes_map[cl][i] = 1. S = [len(X[0])] + self._hl + [len(self._classes)] # Complete information about levels L = len(S) thetas0 = [np.array([[rand(self._eps) for j in range(S[l + 1])] for i in range(S[l] + 1)]) for l in range(L - 1)] # Initialize the thetas matrix thetas0, sd = unroll(thetas0) #return grad(thetas0, X, y, sd, S, self._lambda), gradApprox(thetas0, X, y, sd, S, self._lambda) # For testing # The L-BFGS-B bounds parameter is redefined: input is (lower_bound, upper_bound) instead of array of bounds for each theta parameter if self._opt_args != None and "bounds" in self._opt_args: bounds = [self._opt_args["bounds"] for i in range(len(thetas0))] self._opt_args["bounds"] = bounds self._thetas, self._cost, _ = scp.fmin_l_bfgs_b(cost, thetas0, grad, args = (X, y, sd, S, self._lambda), **self._opt_args) self._thetas = roll(self._thetas, sd) self._cost = float(self._cost) self._can_classify = True
def binary_debug(svm, data, l2_regularization=1e-3, dtype='float64', cost_fn='L2Huber', bfgs_factr=1e11, # 1e7 for moderate tolerance, 1e12 for low bfgs_maxfun=1000, decisions=None ): n_features, = svm.weights.shape X, y = data assert set(y) == set([-1, 1]) _X = theano.shared(X.astype(dtype), allow_downcast=True, borrow=True) _yvecs = theano.shared(y.astype(dtype), allow_downcast=True, borrow=True) sgd_params = tensor.vector(dtype=dtype) sgd_weights = sgd_params[:n_features] sgd_bias = sgd_params[n_features] margin = _yvecs * (tensor.dot(_X, sgd_weights) #+ sgd_bias ) # XXX REFACTOR if cost_fn == 'L2Half': losses = tensor.maximum(0, 1 - margin) ** 2 elif cost_fn == 'L2Huber': # "Huber-ized" L2-SVM losses = tensor.switch( margin > -1, # -- smooth part tensor.maximum(0, 1 - margin) ** 2, # -- straight part -4 * margin) elif cost_fn == 'Hinge': losses = tensor.maximum(0, 1 - margin) else: raise ValueError('invalid cost-fn', cost_fn) l2_cost = .5 * l2_regularization * tensor.dot( sgd_weights, sgd_weights) cost = losses.mean() + l2_cost + sgd_bias ** 2 dcost_dparams = tensor.grad(cost, sgd_params) _f_df = theano.function([sgd_params], [cost, dcost_dparams]) def flatten_svm(obj): # Note this is different from multi-class case because bias is scalar return np.concatenate([obj.weights.flatten(), [obj.bias]]) def f(p): c, d = _f_df(p.astype(dtype)) return c.astype('float64'), d.astype('float64') params = np.zeros(n_features + 1) params[:n_features] = svm.weights params[n_features] = svm.bias best, bestval, info_dct = fmin_l_bfgs_b(f, params, iprint=1, factr=1e-5, maxfun=bfgs_maxfun, m=50, pgtol=1e-5, ) best_svm = copy.deepcopy(svm) best_svm.weights = np.array(best[:n_features], dtype=dtype) best_svm.bias = float(best[n_features]) # why ??? _X.set_value(np.ones((2, 2), dtype=dtype)) _yvecs.set_value(np.ones(2, dtype=dtype)) return best_svm
import openbabel import numpy from scipy.optimize.lbfgsb import fmin_l_bfgs_b def f(x): return x[0]**2 + x[1]**2 def g(x): return numpy.array([2*x[0], 2*x[1]]) x0 = numpy.array([3,1]) opt, energy, dict = fmin_l_bfgs_b(f, x0, fprime=g) print opt
def optimise_lbfgs(self, start): print print "***** LBFGS OPTIMISATION *****" x, f, d = fmin_l_bfgs_b(self.objective, start, fprime=self.grad, pgtol=1e-09, iprint=0) return x
def smoothn(y,nS0=10,axis=None,smoothOrder=2.0,sd=None,verbose=False,\ s0=None,z0=None,isrobust=False,W=None,s=None,MaxIter=100,TolZ=1e-3,weightstr='bisquare'): ''' function [z,s,exitflag,Wtot] = smoothn(varargin) SMOOTHN Robust spline smoothing for 1-D to N-D data. SMOOTHN provides a fast, automatized and robust discretized smoothing spline for data of any dimension. Z = SMOOTHN(Y) automatically smoothes the uniformly-sampled array Y. Y can be any N-D noisy array (time series, images, 3D data,...). Non finite data (NaN or Inf) are treated as missing values. Z = SMOOTHN(Y,S) smoothes the array Y using the smoothing parameter S. S must be a real positive scalar. The larger S is, the smoother the output will be. If the smoothing parameter S is omitted (see previous option) or empty (i.e. S = []), it is automatically determined using the generalized cross-validation (GCV) method. Z = SMOOTHN(Y,W) or Z = SMOOTHN(Y,W,S) specifies a weighting array W of real positive values, that must have the same size as Y. Note that a nil weight corresponds to a missing value. Robust smoothing ---------------- Z = SMOOTHN(...,'robust') carries out a robust smoothing that minimizes the influence of outlying data. [Z,S] = SMOOTHN(...) also returns the calculated value for S so that you can fine-tune the smoothing subsequently if needed. An iteration process is used in the presence of weighted and/or missing values. Z = SMOOTHN(...,OPTION_NAME,OPTION_VALUE) smoothes with the termination parameters specified by OPTION_NAME and OPTION_VALUE. They can contain the following criteria: ----------------- TolZ: Termination tolerance on Z (default = 1e-3) TolZ must be in ]0,1[ MaxIter: Maximum number of iterations allowed (default = 100) Initial: Initial value for the iterative process (default = original data) ----------------- Syntax: [Z,...] = SMOOTHN(...,'MaxIter',500,'TolZ',1e-4,'Initial',Z0); [Z,S,EXITFLAG] = SMOOTHN(...) returns a boolean value EXITFLAG that describes the exit condition of SMOOTHN: 1 SMOOTHN converged. 0 Maximum number of iterations was reached. Class Support ------------- Input array can be numeric or logical. The returned array is of class double. Notes ----- The N-D (inverse) discrete cosine transform functions <a href="matlab:web('http://www.biomecardio.com/matlab/dctn.html')" >DCTN</a> and <a href="matlab:web('http://www.biomecardio.com/matlab/idctn.html')" >IDCTN</a> are required. To be made ---------- Estimate the confidence bands (see Wahba 1983, Nychka 1988). Reference --------- Garcia D, Robust smoothing of gridded data in one and higher dimensions with missing values. Computational Statistics & Data Analysis, 2010. <a href="matlab:web('http://www.biomecardio.com/pageshtm/publi/csda10.pdf')">PDF download</a> Examples: -------- # 1-D example x = linspace(0,100,2**8); y = cos(x/10)+(x/50)**2 + randn(size(x))/10; y[[70, 75, 80]] = [5.5, 5, 6]; z = smoothn(y); # Regular smoothing zr = smoothn(y,'robust'); # Robust smoothing subplot(121), plot(x,y,'r.',x,z,'k','LineWidth',2) axis square, title('Regular smoothing') subplot(122), plot(x,y,'r.',x,zr,'k','LineWidth',2) axis square, title('Robust smoothing') # 2-D example xp = 0:.02:1; [x,y] = meshgrid(xp); f = exp(x+y) + sin((x-2*y)*3); fn = f + randn(size(f))*0.5; fs = smoothn(fn); subplot(121), surf(xp,xp,fn), zlim([0 8]), axis square subplot(122), surf(xp,xp,fs), zlim([0 8]), axis square # 2-D example with missing data n = 256; y0 = peaks(n); y = y0 + rand(size(y0))*2; I = randperm(n^2); y(I(1:n^2*0.5)) = NaN; # lose 1/2 of data y(40:90,140:190) = NaN; # create a hole z = smoothn(y); # smooth data subplot(2,2,1:2), imagesc(y), axis equal off title('Noisy corrupt data') subplot(223), imagesc(z), axis equal off title('Recovered data ...') subplot(224), imagesc(y0), axis equal off title('... compared with original data') # 3-D example [x,y,z] = meshgrid(-2:.2:2); xslice = [-0.8,1]; yslice = 2; zslice = [-2,0]; vn = x.*exp(-x.^2-y.^2-z.^2) + randn(size(x))*0.06; subplot(121), slice(x,y,z,vn,xslice,yslice,zslice,'cubic') title('Noisy data') v = smoothn(vn); subplot(122), slice(x,y,z,v,xslice,yslice,zslice,'cubic') title('Smoothed data') # Cardioid t = linspace(0,2*pi,1000); x = 2*cos(t).*(1-cos(t)) + randn(size(t))*0.1; y = 2*sin(t).*(1-cos(t)) + randn(size(t))*0.1; z = smoothn(complex(x,y)); plot(x,y,'r.',real(z),imag(z),'k','linewidth',2) axis equal tight # Cellular vortical flow [x,y] = meshgrid(linspace(0,1,24)); Vx = cos(2*pi*x+pi/2).*cos(2*pi*y); Vy = sin(2*pi*x+pi/2).*sin(2*pi*y); Vx = Vx + sqrt(0.05)*randn(24,24); # adding Gaussian noise Vy = Vy + sqrt(0.05)*randn(24,24); # adding Gaussian noise I = randperm(numel(Vx)); Vx(I(1:30)) = (rand(30,1)-0.5)*5; # adding outliers Vy(I(1:30)) = (rand(30,1)-0.5)*5; # adding outliers Vx(I(31:60)) = NaN; # missing values Vy(I(31:60)) = NaN; # missing values Vs = smoothn(complex(Vx,Vy),'robust'); # automatic smoothing subplot(121), quiver(x,y,Vx,Vy,2.5), axis square title('Noisy velocity field') subplot(122), quiver(x,y,real(Vs),imag(Vs)), axis square title('Smoothed velocity field') See also SMOOTH, SMOOTH3, DCTN, IDCTN. -- Damien Garcia -- 2009/03, revised 2010/11 Visit my <a href="matlab:web('http://www.biomecardio.com/matlab/smoothn.html')">website</a> for more details about SMOOTHN # Check input arguments error(nargchk(1,12,nargin)); z0=None,W=None,s=None,MaxIter=100,TolZ=1e-3 ''' if type(y) == ma.core.MaskedArray: # masked array is_masked = True mask = y.mask y = np.array(y) y[mask] = 0. if W != None: W = np.array(W) W[mask] = 0. if sd != None: W = np.array(1./sd**2) W[mask] = 0. sd = None y[mask] = np.nan if sd != None: sd_ = np.array(sd) mask = (sd > 0.) W = np.zeros_like(sd_) W[mask] = 1./sd_[mask]**2 sd = None if W != None: W = W/W.max() sizy = y.shape; # sort axis if axis == None: axis = tuple(np.arange(y.ndim)) noe = y.size # number of elements if noe<2: z = y exitflag = 0;Wtot=0 return z,s,exitflag,Wtot #--- # Smoothness parameter and weights #if s != None: # s = [] if W == None: W = ones(sizy); #if z0 == None: # z0 = y.copy() #--- # "Weighting function" criterion weightstr = weightstr.lower() #--- # Weights. Zero weights are assigned to not finite values (Inf or NaN), # (Inf/NaN values = missing data). IsFinite = np.array(isfinite(y)).astype(bool); nof = IsFinite.sum() # number of finite elements W = W*IsFinite; if any(W<0): error('smoothn:NegativeWeights',\ 'Weights must all be >=0') else: #W = W/np.max(W) pass #--- # Weighted or missing data? isweighted = any(W != 1); #--- # Robust smoothing? #isrobust #--- # Automatic smoothing? isauto = not s; #--- # DCTN and IDCTN are required try: from scipy.fftpack.realtransforms import dct,idct except: z = y exitflag = -1;Wtot=0 return z,s,exitflag,Wtot ## Creation of the Lambda tensor #--- # Lambda contains the eingenvalues of the difference matrix used in this # penalized least squares process. axis = tuple(np.array(axis).flatten()) d = y.ndim; Lambda = zeros(sizy); for i in axis: # create a 1 x d array (so e.g. [1,1] for a 2D case siz0 = ones((1,y.ndim))[0]; siz0[i] = sizy[i]; # cos(pi*(reshape(1:sizy(i),siz0)-1)/sizy(i))) # (arange(1,sizy[i]+1).reshape(siz0) - 1.)/sizy[i] Lambda = Lambda + (cos(pi*(arange(1,sizy[i]+1) - 1.)/sizy[i]).reshape(siz0)) #else: # Lambda = Lambda + siz0 Lambda = -2.*(len(axis)-Lambda); if not isauto: Gamma = 1./(1+(s*abs(Lambda))**smoothOrder); ## Upper and lower bound for the smoothness parameter # The average leverage (h) is by definition in [0 1]. Weak smoothing occurs # if h is close to 1, while over-smoothing appears when h is near 0. Upper # and lower bounds for h are given to avoid under- or over-smoothing. See # equation relating h to the smoothness parameter (Equation #12 in the # referenced CSDA paper). N = sum(array(sizy) != 1); # tensor rank of the y-array hMin = 1e-6; hMax = 0.99; # (h/n)**2 = (1 + a)/( 2 a) # a = 1/(2 (h/n)**2 -1) # where a = sqrt(1 + 16 s) # (a**2 -1)/16 try: sMinBnd = np.sqrt((((1+sqrt(1+8*hMax**(2./N)))/4./hMax**(2./N))**2-1)/16.); sMaxBnd = np.sqrt((((1+sqrt(1+8*hMin**(2./N)))/4./hMin**(2./N))**2-1)/16.); except: sMinBnd = None sMaxBnd = None ## Initialize before iterating #--- Wtot = W; #--- Initial conditions for z if isweighted: #--- With weighted/missing data # An initial guess is provided to ensure faster convergence. For that # purpose, a nearest neighbor interpolation followed by a coarse # smoothing are performed. #--- if z0 != None: # an initial guess (z0) has been provided z = z0; else: z = y #InitialGuess(y,IsFinite); z[~IsFinite] = 0. else: z = zeros(sizy); #--- z0 = z; y[~IsFinite] = 0; # arbitrary values for missing y-data #--- tol = 1.; RobustIterativeProcess = True; RobustStep = 1; nit = 0; #--- Error on p. Smoothness parameter s = 10^p errp = 0.1; #opt = optimset('TolX',errp); #--- Relaxation factor RF: to speedup convergence RF = 1 + 0.75*isweighted; # ?? ## Main iterative process #--- if isauto: try: xpost = array([(0.9*log10(sMinBnd) + log10(sMaxBnd)*0.1)]) except: array([100.]) else: xpost = array([log10(s)]) while RobustIterativeProcess: #--- "amount" of weights (see the function GCVscore) aow = sum(Wtot)/noe; # 0 < aow <= 1 #--- while tol>TolZ and nit<MaxIter: if verbose: print 'tol',tol,'nit',nit nit = nit+1; DCTy = dctND(Wtot*(y-z)+z,f=dct,axis=axis); if isauto and not remainder(log2(nit),1): #--- # The generalized cross-validation (GCV) method is used. # We seek the smoothing parameter s that minimizes the GCV # score i.e. s = Argmin(GCVscore). # Because this process is time-consuming, it is performed from # time to time (when nit is a power of 2) #--- # errp in here somewhere #xpost,f,d = lbfgsb.fmin_l_bfgs_b(gcv,xpost,fprime=None,factr=10.,\ # approx_grad=True,bounds=[(log10(sMinBnd),log10(sMaxBnd))],\ # args=(Lambda,aow,DCTy,IsFinite,Wtot,y,nof,noe)) # if we have no clue what value of s to use, better span the # possible range to get a reasonable starting point ... # only need to do it once though. nS0 is teh number of samples used if not s0: ss = np.arange(nS0)*(1./(nS0-1.))*(log10(sMaxBnd)-log10(sMinBnd))+ log10(sMinBnd) g = np.zeros_like(ss) for i,p in enumerate(ss): g[i] = gcv(p,Lambda,aow,DCTy,IsFinite,Wtot,y,nof,noe,smoothOrder,axis) #print 10**p,g[i] xpost = [np.median(ss[g==g.min()])] #print '===============' #print nit,tol,g.min(),xpost[0],s #print '===============' else: xpost = [s0] xpost,f,d = lbfgsb.fmin_l_bfgs_b(gcv,xpost,fprime=None,factr=10.,\ approx_grad=True,bounds=[(log10(sMinBnd),log10(sMaxBnd))],\ args=(Lambda,aow,DCTy,IsFinite,Wtot,y,nof,noe,smoothOrder,axis)) s = 10**xpost[0]; # update the value we use for the initial s estimate s0 = xpost[0] Gamma = 1./(1+(s*abs(Lambda))**smoothOrder); z = RF*dctND(Gamma*DCTy,f=idct,axis=axis) + (1-RF)*z; # if no weighted/missing data => tol=0 (no iteration) tol = isweighted*norm(z0-z)/norm(z); z0 = z; # re-initialization exitflag = nit<MaxIter; if isrobust: #-- Robust Smoothing: iteratively re-weighted process #--- average leverage h = sqrt(1+16.*s); h = sqrt(1+h)/sqrt(2)/h; h = h**N; #--- take robust weights into account Wtot = W*RobustWeights(y-z,IsFinite,h,weightstr); #--- re-initialize for another iterative weighted process isweighted = True; tol = 1; nit = 0; #--- RobustStep = RobustStep+1; RobustIterativeProcess = RobustStep<3; # 3 robust steps are enough. else: RobustIterativeProcess = False; # stop the whole process ## Warning messages #--- if isauto: if abs(log10(s)-log10(sMinBnd))<errp: warning('MATLAB:smoothn:SLowerBound',\ ['s = %.3f '%(s) + ': the lower bound for s '\ + 'has been reached. Put s as an input variable if required.']) elif abs(log10(s)-log10(sMaxBnd))<errp: warning('MATLAB:smoothn:SUpperBound',\ ['s = %.3f '%(s) + ': the upper bound for s '\ + 'has been reached. Put s as an input variable if required.']) #warning('MATLAB:smoothn:MaxIter',\ # ['Maximum number of iterations (%d'%(MaxIter) + ') has '\ # + 'been exceeded. Increase MaxIter option or decrease TolZ value.']) return z,s,exitflag,Wtot
def optimise_lbfgs(self,start): print print "***** LBFGS OPTIMISATION *****" x,f,d = fmin_l_bfgs_b(self.objective, start, fprime=self.grad, pgtol=1e-09, iprint=0) return x
def minimize_lbfgs(self, parameters, x, y, sigma, emp_counts, classes_idx, nr_x, nr_f, nr_c): parameters2 = parameters.reshape([nr_f*nr_c], order="F") result, _, d = opt2.fmin_l_bfgs_b(self.get_objective, parameters2, args=[x, y, sigma, emp_counts, classes_idx, nr_x, nr_f, nr_c]) return result.reshape([nr_f, nr_c], order="F")
def train(): np.random.seed(131742) #get sentences, trees and labels nExamples = -1 print "loading data.." rnnData = RNNDataCorpus() rnnData.load_data(load_file=config.train_data, nExamples=nExamples) #initialize params print "initializing params" params = Params(data=rnnData, wordSize=50, rankWo=2) #define theta #one vector for all the parameters of mvrnn model: W, Wm, Wlabel, L, Lm n = params.wordSize; fanIn = params.fanIn; nWords = params.nWords; nLabels = params.categories; rank=params.rankWo Wo = 0.01*np.random.randn(n + 2*n*rank, nWords) #Lm, as in paper Wo[:n,:] = np.ones((n,Wo.shape[1])) #Lm, as in paper Wcat = 0.005*np.random.randn(nLabels, fanIn) #Wlabel, as in paper # Wv = 0.01*np.random.randn(n, nWords) # WO = 0.01*np.random.randn(n, 2*n) # W = 0.01*np.random.randn(n, 2*n+1) #load pre-trained weights here mats = sio.loadmat(config.pre_trained_weights) Wv = mats.get('Wv') #L, as in paper W = mats.get('W') #W, as in paper WO = mats.get('WO') #Wm, as in paper sentencesIdx = np.arange(rnnData.ndoc()) np.random.shuffle(sentencesIdx) nTrain = 4*len(sentencesIdx)/5 trainSentIdx = sentencesIdx[0:nTrain] testSentIdx = sentencesIdx[nTrain:] batchSize = 5 nBatches = len(trainSentIdx)/batchSize evalFreq = 5 #evaluate after every 5 minibatches nTestSentEval = 50 #number of test sentences to be evaluated rnnData_train = RNNDataCorpus() rnnData.copy_into_minibatch(rnnData_train, trainSentIdx) rnnData_test = RNNDataCorpus() if(len(testSentIdx) > nTestSentEval): # np.random.shuffle(testSentIdx) #choose random test examples thisTestSentIdx = testSentIdx[:nTestSentEval] else: thisTestSentIdx = testSentIdx rnnData.copy_into_minibatch(rnnData_test, thisTestSentIdx) # [Wv_test, Wo_test, _] = getRelevantWords(rnnData_test, Wv,Wo,params) [Wv_trainTest, Wo_trainTest, all_train_idx] = getRelevantWords(rnnData, Wv,Wo,params) #sets nWords_reduced, returns new arrays theta = np.concatenate((W.flatten(), WO.flatten(), Wcat.flatten(), Wv_trainTest.flatten(), Wo_trainTest.flatten())) #optimize print "starting training..." nIter = 100 rnnData_minibatch = RNNDataCorpus() for i in range(nIter): #train in minibatches # ftrain = np.zeros(nBatches) # for ibatch in range(nBatches): # set_minibatch(rnnData, rnnData_minibatch, ibatch, nBatches, trainSentIdx) # print 'Iteration: ', i, ' minibatch: ', ibatch tunedTheta, fbatch_train, _ = lbfgsb.fmin_l_bfgs_b(func=costFn, x0=theta, fprime=None, args=(rnnData_train, params), approx_grad=0, bounds=None, m=5, factr=1000000000000000.0, pgtol=1.0000000000000001e-5, epsilon=1e-08, iprint=3, maxfun=1, disp=0) #map parameters back W[:,:], WO[:,:], Wcat[:,:], Wv_trainTest, Wo_trainTest = unroll_theta(tunedTheta, params) Wv[:,all_train_idx] = Wv_trainTest Wo[:,all_train_idx] = Wo_trainTest # ftrain[ibatch] = fbatch_train theta = tunedTheta #for next iteration print "========================================" print "XXXXXXIteration ", i, print "Average cost: ", np.average(fbatch_train) evaluate(Wv,Wo,W,WO,Wcat,params, rnnData_test) print "========================================" #save weights save_dict = {'Wv':Wv, 'Wo':Wo, 'Wcat':Wcat, 'W':W, 'WO':WO} sio.savemat(config.saved_params_file+'_lbfgs_iter'+str(i), mdict=save_dict) print "saved tuned theta. "
def BlockedTheanoOVA(svm, data, l2_regularization=1e-3, dtype='float64', GPU_blocksize=1000 * (1024 ** 2), # bytes verbose=False, ): n_features, n_classes = svm.weights.shape _X = theano.shared(np.ones((2, 2), dtype=dtype), allow_downcast=True) _yvecs = theano.shared(np.ones((2, 2), dtype=dtype), allow_downcast=True) sgd_params = tensor.vector(dtype=dtype) flat_sgd_weights = sgd_params[:n_features * n_classes] sgd_weights = flat_sgd_weights.reshape((n_features, n_classes)) sgd_bias = sgd_params[n_features * n_classes:] margin = _yvecs * (tensor.dot(_X, sgd_weights) + sgd_bias) losses = tensor.maximum(0, 1 - margin) ** 2 l2_cost = .5 * l2_regularization * tensor.dot( flat_sgd_weights, flat_sgd_weights) cost = losses.mean(axis=0).sum() + l2_cost dcost_dparams = tensor.grad(cost, sgd_params) _f_df = theano.function([sgd_params], [cost, dcost_dparams]) assert dtype == 'float32' sizeof_dtype = 4 X, y = data yvecs = np.asarray( (y[:, None] == np.arange(n_classes)) * 2 - 1, dtype=dtype) X_blocks = np.ceil(X.size * sizeof_dtype / float(GPU_blocksize)) examples_per_block = len(X) // X_blocks if verbose: print 'dividing into', X_blocks, 'blocks of', examples_per_block # -- create a dummy class because a nested function cannot modify # params_mean in enclosing scope class Dummy(object): def __init__(self, collect_estimates): params = np.zeros(n_features * n_classes + n_classes) params[:n_features * n_classes] = svm.weights.flatten() params[n_features * n_classes:] = svm.bias self.params = params self.params_mean = params.copy().astype('float64') self.params_mean_i = 0 self.collect_estimates = collect_estimates def update_mean(self, p): self.params_mean_i += 1 alpha = 1.0 / self.params_mean_i self.params_mean *= 1 - alpha self.params_mean += alpha * p def __call__(self, p): if self.collect_estimates: self.update_mean(p) c, d = _f_df(p.astype(dtype)) return c.astype('float64'), d.astype('float64') dummy = Dummy(X_blocks > 2) i = 0 while i + examples_per_block <= len(X): if verbose: print 'training on examples', i, 'to', i + examples_per_block _X.set_value( X[i:i + examples_per_block], borrow=True) _yvecs.set_value( yvecs[i:i + examples_per_block], borrow=True) best, bestval, info_dct = fmin_l_bfgs_b(dummy, dummy.params_mean.copy(), iprint=1 if verbose else -1, factr=1e11, # -- 1e12 for low acc, 1e7 for moderate maxfun=1000, ) dummy.update_mean(best) i += examples_per_block params = dummy.params_mean rval = classifier_from_weights( weights=params[:n_classes * n_features].reshape( (n_features, n_classes)), bias=params[n_classes * n_features:]) return rval
def learn(self, X, y): """ Learn the model from the given data. :param X: the attribute data :type X: numpy.array :param y: the class variable data :type y: numpy.array """ def rand(eps): """Return random number in interval [-eps, eps].""" return rnd.random() * 2 * eps - eps def g_func(z): """The sigmoid (logistic) function.""" return 1. / (1. + np.exp(-z)) def h_func(thetas, x): """The model function.""" a = np.array([[1.] + list(x)]).T # Initialize a for l in range(1, len(thetas) + 1): # Forward propagation a = np.vstack( (np.array([[1.]]), g_func(thetas[l - 1].T.dot(a)))) return a[1:] def llog(val): "The limited logarithm." e = 1e-10 return np.log(np.clip(val, e, 1. - e)) def unroll(thetas): """Unrolls a list of thetas into vector.""" sd = [m.shape for m in thetas] # Keep the shape data thetas = np.concatenate( [theta.reshape(np.prod(theta.shape)) for theta in thetas]) return thetas, sd def roll(thetas, sd): """Rolls a vector of thetas back into list.""" thetas = np.split(thetas, [ sum(np.prod(s) for s in sd[:i]) + np.prod(sd[i]) for i in range(len(sd) - 1) ]) return [np.reshape(theta, sd[i]) for i, theta in enumerate(thetas)] def cost(thetas, X, y, sd, S, lambda_): """The cost function of the neural network.""" thetas = roll(thetas, sd) m, _ = X.shape L = len(S) reg_factor = (lambda_ / float(2 * m)) * sum( sum(sum(thetas[l][1:, :]**2)) for l in range(L - 1)) cost = (-1. / float(m)) * sum( sum(self._classes_map[y[s]] * llog(h_func(thetas, X[s])) + (1. - self._classes_map[y[s]]) * llog(1. - h_func(thetas, X[s]))) for s in range(m)) + reg_factor if self._verbose: print "Current value of cost func.: " + str(cost[0]) return cost[0] def grad(thetas, X, y, sd, S, lambda_): """The gradient (derivate) function which includes the back propagation algorithm.""" thetas = roll(thetas, sd) m, _ = X.shape L = len(S) d = [ np.array([[0. for j in range(S[l + 1])] for i in range(S[l] + 1)]) for l in range(L - 1) ] # Initialize the delta matrix for s in range(m): a = [ np.array([[1.] + list(X[s])]).T ] # Initialize a (only a, d & theta matrices have 1 more element in columns, biases) for l in range(1, L): # Forward propagation a.append( np.vstack((np.array([[1.]]), g_func(thetas[l - 1].T.dot(a[l - 1]))))) # TODO # Softmax: treat last a column differently #ez = np.exp(thetas[L - 2].T.dot(a[L - 2])) #sez = sum(ez) #a.append(np.vstack((np.array([[1]]), ez / sez))) deltas = [None for l in range(L - 1) ] + [a[-1][1:] - self._classes_map[y[s]]] for l in range(L - 2, 0, -1): # Backward propagation deltas[l] = (thetas[l].dot(deltas[l + 1]) * (a[l] * (1. - a[l])))[1:] for l in range(L - 1): d[l] = d[l] + a[l].dot(deltas[l + 1].T) D = [(1. / float(m)) * d[l] + lambda_ * thetas[l] for l in range(L - 1)] D = [ Di - lambda_ * np.vstack( (thetas[l][0], np.zeros((Di.shape[0] - 1, Di.shape[1])))) for l, Di in enumerate(D) ] # Where i = 0, don't use regularization D, _ = unroll(D) return D def gradApprox(thetas, X, y, sd, S, lambda_): """Approximate the gradient of the cost function (only used for debugging, not in final version).""" eps = 1e-14 return (grad(thetas + eps, X, y, sd, S, lambda_) - grad(thetas - eps, X, y, sd, S, lambda_)) / float(2 * eps) # Set the random seed rnd.seed(self._seed) # Initialize the final layer of neural net (outputs) self._classes = list(set(y)) for i, cl in enumerate(self._classes): self._classes_map[cl] = np.zeros((len(self._classes), 1)) self._classes_map[cl][i] = 1. S = [len(X[0])] + self._hl + [len(self._classes) ] # Complete information about levels L = len(S) thetas0 = [ np.array([[rand(self._eps) for j in range(S[l + 1])] for i in range(S[l] + 1)]) for l in range(L - 1) ] # Initialize the thetas matrix thetas0, sd = unroll(thetas0) #return grad(thetas0, X, y, sd, S, self._lambda), gradApprox(thetas0, X, y, sd, S, self._lambda) # For testing # The L-BFGS-B bounds parameter is redefined: input is (lower_bound, upper_bound) instead of array of bounds for each theta parameter if self._opt_args != None and "bounds" in self._opt_args: bounds = [self._opt_args["bounds"] for i in range(len(thetas0))] self._opt_args["bounds"] = bounds self._thetas, self._cost, _ = scp.fmin_l_bfgs_b(cost, thetas0, grad, args=(X, y, sd, S, self._lambda), **self._opt_args) self._thetas = roll(self._thetas, sd) self._cost = float(self._cost) self._can_classify = True
def SubsampledTheanoOVA(svm, data, l2_regularization=1e-3, dtype='float64', feature_bytes=1000 * (1024 ** 2), # bytes verbose=False, rng=None, n_runs=None, # None -> smallest int that uses all data n_keep=None, # None -> X.shape[1] / n_runs cost_fn='L2Huber', bfgs_factr=1e11, # 1e7 for moderate tolerance, 1e12 for low bfgs_maxfun=1000, decisions=None, decision_hack=None, ): # I tried to change the problem to work with reduced regularization # or a smaller minimal margin (e.g. < 1) to compensate for the missing # features, but nothing really worked. # # I think the better thing would be to do boosting, in just the way we # did in the eccv12 project (see e.g. MarginASGD) n_features, n_classes = svm.weights.shape X, y = data if verbose: print 'Training svm on design matrix of size', X.shape print ' with', n_classes, 'features' if n_keep is None: if n_runs is None: sizeof_dtype = {'float32': 4, 'float64': 8}[dtype] Xbytes = X.size * sizeof_dtype keep_ratio = float(feature_bytes) / Xbytes n_runs = int(np.ceil(1. / keep_ratio)) n_keep = int(np.ceil(X.shape[1] / float(n_runs))) else: if n_runs is None: n_runs = int(np.ceil(X.shape[1] / float(n_keep))) _X = theano.shared(np.ones((2, 2), dtype=dtype), allow_downcast=True) _yvecs = theano.shared(np.ones((2, 2), dtype=dtype), allow_downcast=True) if decisions is None: _decisions = theano.shared( np.zeros((len(y), n_classes), dtype=dtype), allow_downcast=True) else: decisions = np.asarray(decisions).astype(dtype) # -- N.B. for multi-class the decisions would be an examples x classes # matrix if decisions.shape != (len(y), n_classes): raise ValueError('decisions have wrong shape', decisions.shape) _decisions = theano.shared(decisions) del decisions sgd_params = tensor.vector(dtype=dtype) s_n_use = tensor.lscalar() flat_sgd_weights = sgd_params[:s_n_use * n_classes] sgd_weights = flat_sgd_weights.reshape((s_n_use, n_classes)) sgd_bias = sgd_params[s_n_use * n_classes:] margin = _yvecs * (tensor.dot(_X, sgd_weights) + sgd_bias + _decisions) if cost_fn == 'L2Half': losses = tensor.maximum(0, 1 - margin) ** 2 elif cost_fn == 'L2Huber': # "Huber-ized" L2-SVM losses = tensor.switch( margin > -1, # -- smooth part tensor.maximum(0, 1 - margin) ** 2, # -- straight part -4 * margin) elif cost_fn == 'Hinge': losses = tensor.maximum(0, 1 - margin) else: raise ValueError('invalid cost-fn', cost_fn) l2_cost = .5 * l2_regularization * tensor.dot( flat_sgd_weights, flat_sgd_weights) cost = losses.mean(axis=0).sum() + l2_cost dcost_dparams = tensor.grad(cost, sgd_params) _f_df = theano.function([sgd_params, s_n_use], [cost, dcost_dparams]) yvecs = np.asarray( (y[:, None] == np.arange(n_classes)) * 2 - 1, dtype=dtype) # TODO: reconsider how to use this function when doing partial fitting #_f_update_decisions = theano.function([sgd_params, s_n_use], [], # updates={ # _decisions: (_decisions # + tensor.dot(_X, sgd_weights) + sgd_bias), # }) def flatten_svm(obj): return np.concatenate([obj.weights.flatten(), obj.bias]) if verbose: print 'keeping', n_keep, 'of', X.shape[1], 'features' if rng is None: rng = np.random.RandomState(123) all_feat_randomized = rng.permutation(X.shape[1]) bests = [] for ii in range(n_runs): use_features = all_feat_randomized[ii * n_keep: (ii + 1) * n_keep] assert len(use_features) n_use = len(use_features) def f(p): c, d = _f_df(p.astype(dtype), n_use) return c.astype('float64'), d.astype('float64') params = np.zeros(n_use * n_classes + n_classes) params[:n_use * n_classes] = svm.weights[use_features].flatten() params[n_use * n_classes:] = svm.bias _X.set_value(X[:, use_features], borrow=True) _yvecs.set_value(yvecs, borrow=True) best, bestval, info_dct = fmin_l_bfgs_b(f, params, iprint=1 if verbose else -1, factr=bfgs_factr, # -- 1e12 for low acc, 1e7 for moderate maxfun=bfgs_maxfun, ) best_svm = copy.deepcopy(svm) best_svm.weights[use_features] = best[:n_classes * n_use].reshape( (n_use, n_classes)) best_svm.bias = best[n_classes * n_use:] bests.append(flatten_svm(best_svm)) # sum instead of mean here, because each loop iter trains only a subset of # features. XXX: This assumes that those subsets are mutually exclusive best_params = np.sum(bests, axis=0) rval = copy.deepcopy(svm) rval.weights = best_params[:n_classes * n_features].reshape( (n_features, n_classes)) rval.bias = best_params[n_classes * n_features:] # XXX: figure out why Theano may be not freeing this memory, why does # writing little matrices here help? _X.set_value(np.ones((2, 2), dtype=dtype)) _yvecs.set_value(np.ones((2, 2), dtype=dtype)) _decisions.set_value(np.ones((2, 2), dtype=dtype)) return rval
def BinarySubsampledTheanoOVA(svm, data, l2_regularization=1e-3, dtype='float64', feature_bytes=1000 * (1024 ** 2), # bytes verbose=False, rng=None, n_runs=None, # None -> smallest int that uses all data cost_fn='L2Huber', bfgs_factr=1e11, # 1e7 for moderate tolerance, 1e12 for low bfgs_maxfun=1000, decisions=None ): n_features, = svm.weights.shape X, y = data # XXX REFACTOR if n_runs is None: sizeof_dtype = {'float32': 4, 'float64': 8}[dtype] Xbytes = X.size * sizeof_dtype keep_ratio = float(feature_bytes) / Xbytes n_runs = int(np.ceil(1. / keep_ratio)) print 'BinarySubsampledTheanoOVA using n_runs =', n_runs n_keep = int(np.ceil(X.shape[1] / float(n_runs))) assert set(y) == set([-1, 1]) _X = theano.shared(np.ones((2, 2), dtype=dtype), allow_downcast=True, borrow=True) _yvecs = theano.shared(y.astype(dtype), allow_downcast=True, borrow=True) if decisions: decisions = np.asarray(decisions).astype(dtype) # -- N.B. for multi-class the decisions would be an examples x classes # matrix if decisions.shape != y.shape: raise ValueError('decisions have wrong shape', decisions.shape) _decisions = theano.shared(decisions) del decisions else: _decisions = theano.shared(y.astype(dtype) * 0, allow_downcast=True) sgd_params = tensor.vector(dtype=dtype) s_n_use = tensor.lscalar() sgd_weights = sgd_params[:s_n_use] sgd_bias = sgd_params[s_n_use] margin = _yvecs * (tensor.dot(_X, sgd_weights) + sgd_bias + _decisions) # XXX REFACTOR if cost_fn == 'L2Half': losses = tensor.maximum(0, 1 - margin) ** 2 elif cost_fn == 'L2Huber': # "Huber-ized" L2-SVM losses = tensor.switch( margin > -1, # -- smooth part tensor.maximum(0, 1 - margin) ** 2, # -- straight part -4 * margin) elif cost_fn == 'Hinge': losses = tensor.maximum(0, 1 - margin) else: raise ValueError('invalid cost-fn', cost_fn) l2_cost = .5 * l2_regularization * tensor.dot( sgd_weights, sgd_weights) cost = losses.mean() + l2_cost dcost_dparams = tensor.grad(cost, sgd_params) _f_df = theano.function([sgd_params, s_n_use], [cost, dcost_dparams]) _f_update_decisions = theano.function([sgd_params, s_n_use], [], updates={ _decisions: ( tensor.dot(_X, sgd_weights) + sgd_bias + _decisions), }) def flatten_svm(obj): # Note this is different from multi-class case because bias is scalar return np.concatenate([obj.weights.flatten(), [obj.bias]]) if verbose: print 'keeping', n_keep, 'of', X.shape[1], 'features, per round' print 'running for ', n_runs, 'rounds' if rng is None: rng = np.random.RandomState(123) all_feat_randomized = rng.permutation(X.shape[1]) bests = [] for ii in range(n_runs): use_features = all_feat_randomized[ii * n_keep: (ii + 1) * n_keep] assert len(use_features) n_use = len(use_features) def f(p): c, d = _f_df(p.astype(dtype), n_use) return c.astype('float64'), d.astype('float64') params = np.zeros(n_use + 1) params[:n_use] = svm.weights[use_features].flatten() params[n_use] = svm.bias _X.set_value(X[:, use_features], borrow=True) best, bestval, info_dct = fmin_l_bfgs_b(f, params, iprint=int(verbose) - 1, factr=bfgs_factr, maxfun=bfgs_maxfun, ) best_svm = copy.deepcopy(svm) best_svm.weights[use_features] = np.array(best[:n_use], dtype=dtype) best_svm.bias = float(best[n_use]) bests.append(flatten_svm(best_svm)) _f_update_decisions(best.astype(dtype), n_use) margin_ii = _decisions.get_value() * _yvecs.get_value() print 'run %i: margin min:%f mean:%f max:%f' % ( ii, np.min(margin_ii), np.mean(margin_ii), np.max(margin_ii)) if 0: # XXX This is a hack that helps but it's basically wrong. The # correct thing to do would be to add two scalars to the # optimization: one scalar represents the total l2 norm of the # weight vector fit so far. The second scalar represents how much # to down-weight the total vector fit so far in response to the # utility of the current feature set. So this second scalar would # scale the vector of previous decisions, and the l2-cost would # always be the l2-cost of the entire vector so far. _decisions.set_value( _decisions.get_value() - np.min(margin_ii) * y) elif (ii < (n_runs - 1)) and (np.min(margin_ii) > .95): print 'Margin has been maximized after', ii, 'of', n_runs break # N.B. we might have used fewer than n_runs best_params = np.sum(bests, axis=0) best_params[n_features] /= len(bests) # bias is estimated on each run rval = copy.deepcopy(svm) rval.weights = best_params[:n_features].astype(dtype) rval.bias = float(best_params[n_features]) # XXX: figure out why Theano may be not freeing this memory, why does # writing little matrices here help? _X.set_value(np.ones((2, 2), dtype=dtype)) _yvecs.set_value(np.ones(2, dtype=dtype)) return rval
def smoothn(y,nS0=10,axis=None,smoothOrder=2.0,sd=None,verbose=False,\ s0=None,z0=None,isrobust=False,W=None,s=None,MaxIter=100,TolZ=1e-3,weightstr='bisquare'): ''' function [z,s,exitflag,Wtot] = smoothn(varargin) SMOOTHN Robust spline smoothing for 1-D to N-D data. SMOOTHN provides a fast, automatized and robust discretized smoothing spline for data of any dimension. Z = SMOOTHN(Y) automatically smoothes the uniformly-sampled array Y. Y can be any N-D noisy array (time series, images, 3D data,...). Non finite data (NaN or Inf) are treated as missing values. Z = SMOOTHN(Y,S) smoothes the array Y using the smoothing parameter S. S must be a real positive scalar. The larger S is, the smoother the output will be. If the smoothing parameter S is omitted (see previous option) or empty (i.e. S = []), it is automatically determined using the generalized cross-validation (GCV) method. Z = SMOOTHN(Y,W) or Z = SMOOTHN(Y,W,S) specifies a weighting array W of real positive values, that must have the same size as Y. Note that a nil weight corresponds to a missing value. Robust smoothing ---------------- Z = SMOOTHN(...,'robust') carries out a robust smoothing that minimizes the influence of outlying data. [Z,S] = SMOOTHN(...) also returns the calculated value for S so that you can fine-tune the smoothing subsequently if needed. An iteration process is used in the presence of weighted and/or missing values. Z = SMOOTHN(...,OPTION_NAME,OPTION_VALUE) smoothes with the termination parameters specified by OPTION_NAME and OPTION_VALUE. They can contain the following criteria: ----------------- TolZ: Termination tolerance on Z (default = 1e-3) TolZ must be in ]0,1[ MaxIter: Maximum number of iterations allowed (default = 100) Initial: Initial value for the iterative process (default = original data) ----------------- Syntax: [Z,...] = SMOOTHN(...,'MaxIter',500,'TolZ',1e-4,'Initial',Z0); [Z,S,EXITFLAG] = SMOOTHN(...) returns a boolean value EXITFLAG that describes the exit condition of SMOOTHN: 1 SMOOTHN converged. 0 Maximum number of iterations was reached. Class Support ------------- Input array can be numeric or logical. The returned array is of class double. Notes ----- The N-D (inverse) discrete cosine transform functions <a href="matlab:web('http://www.biomecardio.com/matlab/dctn.html')" >DCTN</a> and <a href="matlab:web('http://www.biomecardio.com/matlab/idctn.html')" >IDCTN</a> are required. To be made ---------- Estimate the confidence bands (see Wahba 1983, Nychka 1988). Reference --------- Garcia D, Robust smoothing of gridded data in one and higher dimensions with missing values. Computational Statistics & Data Analysis, 2010. <a href="matlab:web('http://www.biomecardio.com/pageshtm/publi/csda10.pdf')">PDF download</a> Examples: -------- # 1-D example x = linspace(0,100,2**8); y = cos(x/10)+(x/50)**2 + randn(size(x))/10; y[[70, 75, 80]] = [5.5, 5, 6]; z = smoothn(y); # Regular smoothing zr = smoothn(y,'robust'); # Robust smoothing subplot(121), plot(x,y,'r.',x,z,'k','LineWidth',2) axis square, title('Regular smoothing') subplot(122), plot(x,y,'r.',x,zr,'k','LineWidth',2) axis square, title('Robust smoothing') # 2-D example xp = 0:.02:1; [x,y] = meshgrid(xp); f = exp(x+y) + sin((x-2*y)*3); fn = f + randn(size(f))*0.5; fs = smoothn(fn); subplot(121), surf(xp,xp,fn), zlim([0 8]), axis square subplot(122), surf(xp,xp,fs), zlim([0 8]), axis square # 2-D example with missing data n = 256; y0 = peaks(n); y = y0 + rand(size(y0))*2; I = randperm(n^2); y(I(1:n^2*0.5)) = NaN; # lose 1/2 of data y(40:90,140:190) = NaN; # create a hole z = smoothn(y); # smooth data subplot(2,2,1:2), imagesc(y), axis equal off title('Noisy corrupt data') subplot(223), imagesc(z), axis equal off title('Recovered data ...') subplot(224), imagesc(y0), axis equal off title('... compared with original data') # 3-D example [x,y,z] = meshgrid(-2:.2:2); xslice = [-0.8,1]; yslice = 2; zslice = [-2,0]; vn = x.*exp(-x.^2-y.^2-z.^2) + randn(size(x))*0.06; subplot(121), slice(x,y,z,vn,xslice,yslice,zslice,'cubic') title('Noisy data') v = smoothn(vn); subplot(122), slice(x,y,z,v,xslice,yslice,zslice,'cubic') title('Smoothed data') # Cardioid t = linspace(0,2*pi,1000); x = 2*cos(t).*(1-cos(t)) + randn(size(t))*0.1; y = 2*sin(t).*(1-cos(t)) + randn(size(t))*0.1; z = smoothn(complex(x,y)); plot(x,y,'r.',real(z),imag(z),'k','linewidth',2) axis equal tight # Cellular vortical flow [x,y] = meshgrid(linspace(0,1,24)); Vx = cos(2*pi*x+pi/2).*cos(2*pi*y); Vy = sin(2*pi*x+pi/2).*sin(2*pi*y); Vx = Vx + sqrt(0.05)*randn(24,24); # adding Gaussian noise Vy = Vy + sqrt(0.05)*randn(24,24); # adding Gaussian noise I = randperm(numel(Vx)); Vx(I(1:30)) = (rand(30,1)-0.5)*5; # adding outliers Vy(I(1:30)) = (rand(30,1)-0.5)*5; # adding outliers Vx(I(31:60)) = NaN; # missing values Vy(I(31:60)) = NaN; # missing values Vs = smoothn(complex(Vx,Vy),'robust'); # automatic smoothing subplot(121), quiver(x,y,Vx,Vy,2.5), axis square title('Noisy velocity field') subplot(122), quiver(x,y,real(Vs),imag(Vs)), axis square title('Smoothed velocity field') See also SMOOTH, SMOOTH3, DCTN, IDCTN. -- Damien Garcia -- 2009/03, revised 2010/11 Visit my <a href="matlab:web('http://www.biomecardio.com/matlab/smoothn.html')">website</a> for more details about SMOOTHN # Check input arguments error(nargchk(1,12,nargin)); z0=None,W=None,s=None,MaxIter=100,TolZ=1e-3 ''' if type(y) == ma.core.MaskedArray: # masked array is_masked = True mask = y.mask y = np.array(y) y[mask] = 0. if W != None: W = np.array(W) W[mask] = 0. if sd != None: W = np.array(1./sd**2) W[mask] = 0. sd = None y[mask] = np.nan if sd != None: sd_ = np.array(sd) mask = (sd > 0.) W = np.zeros_like(sd_) W[mask] = 1./sd_[mask]**2 sd = None if W != None: W = W/W.max() sizy = y.shape; # sort axis if axis == None: axis = tuple(np.arange(y.ndim)) noe = y.size # number of elements if noe<2: z = y exitflag = 0;Wtot=0 return z,s,exitflag,Wtot #--- # Smoothness parameter and weights #if s != None: # s = [] if W == None: W = ones(sizy); #if z0 == None: # z0 = y.copy() #--- # "Weighting function" criterion weightstr = weightstr.lower() #--- # Weights. Zero weights are assigned to not finite values (Inf or NaN), # (Inf/NaN values = missing data). IsFinite = np.array(isfinite(y)).astype(bool); nof = IsFinite.sum() # number of finite elements W = W*IsFinite; if any(W<0): error('smoothn:NegativeWeights',\ 'Weights must all be >=0') else: #W = W/np.max(W) pass #--- # Weighted or missing data? isweighted = any(W != 1); #--- # Robust smoothing? #isrobust #--- # Automatic smoothing? isauto = not s; #--- # DCTN and IDCTN are required try: from scipy.fftpack.realtransforms import dct,idct except: z = y exitflag = -1;Wtot=0 return z,s,exitflag,Wtot ## Creation of the Lambda tensor #--- # Lambda contains the eingenvalues of the difference matrix used in this # penalized least squares process. axis = tuple(np.array(axis).flatten()) d = y.ndim; Lambda = zeros(sizy); for i in axis: # create a 1 x d array (so e.g. [1,1] for a 2D case siz0 = ones((1,y.ndim))[0]; siz0[i] = sizy[i]; # cos(pi*(reshape(1:sizy(i),siz0)-1)/sizy(i))) # (arange(1,sizy[i]+1).reshape(siz0) - 1.)/sizy[i] Lambda = Lambda + (cos(pi*(arange(1,sizy[i]+1) - 1.)/sizy[i]).reshape(siz0)) #else: # Lambda = Lambda + siz0 Lambda = -2.*(len(axis)-Lambda); if not isauto: Gamma = 1./(1+(s*abs(Lambda))**smoothOrder); ## Upper and lower bound for the smoothness parameter # The average leverage (h) is by definition in [0 1]. Weak smoothing occurs # if h is close to 1, while over-smoothing appears when h is near 0. Upper # and lower bounds for h are given to avoid under- or over-smoothing. See # equation relating h to the smoothness parameter (Equation #12 in the # referenced CSDA paper). N = sum(array(sizy) != 1); # tensor rank of the y-array hMin = 1e-6; hMax = 0.99; # (h/n)**2 = (1 + a)/( 2 a) # a = 1/(2 (h/n)**2 -1) # where a = sqrt(1 + 16 s) # (a**2 -1)/16 try: sMinBnd = np.sqrt((((1+sqrt(1+8*hMax**(2./N)))/4./hMax**(2./N))**2-1)/16.); sMaxBnd = np.sqrt((((1+sqrt(1+8*hMin**(2./N)))/4./hMin**(2./N))**2-1)/16.); except: sMinBnd = None sMaxBnd = None ## Initialize before iterating #--- Wtot = W; #--- Initial conditions for z if isweighted: #--- With weighted/missing data # An initial guess is provided to ensure faster convergence. For that # purpose, a nearest neighbor interpolation followed by a coarse # smoothing are performed. #--- if z0 != None: # an initial guess (z0) has been provided z = z0; else: z = y #InitialGuess(y,IsFinite); z[~IsFinite] = 0. else: z = zeros(sizy); #--- z0 = z; y[~IsFinite] = 0; # arbitrary values for missing y-data #--- tol = 1.; RobustIterativeProcess = True; RobustStep = 1; nit = 0; #--- Error on p. Smoothness parameter s = 10^p errp = 0.1; #opt = optimset('TolX',errp); #--- Relaxation factor RF: to speedup convergence RF = 1 + 0.75*isweighted; # ?? ## Main iterative process #--- if isauto: try: xpost = array([(0.9*log10(sMinBnd) + log10(sMaxBnd)*0.1)]) except: array([100.]) else: xpost = array([log10(s)]) while RobustIterativeProcess: #--- "amount" of weights (see the function GCVscore) aow = sum(Wtot)/noe; # 0 < aow <= 1 #--- while tol>TolZ and nit<MaxIter: if verbose: print('tol',tol,'nit',nit) nit = nit+1; DCTy = dctND(Wtot*(y-z)+z,f=dct); if isauto and not remainder(log2(nit),1): #--- # The generalized cross-validation (GCV) method is used. # We seek the smoothing parameter s that minimizes the GCV # score i.e. s = Argmin(GCVscore). # Because this process is time-consuming, it is performed from # time to time (when nit is a power of 2) #--- # errp in here somewhere #xpost,f,d = lbfgsb.fmin_l_bfgs_b(gcv,xpost,fprime=None,factr=10.,\ # approx_grad=True,bounds=[(log10(sMinBnd),log10(sMaxBnd))],\ # args=(Lambda,aow,DCTy,IsFinite,Wtot,y,nof,noe)) # if we have no clue what value of s to use, better span the # possible range to get a reasonable starting point ... # only need to do it once though. nS0 is teh number of samples used if not s0: ss = np.arange(nS0)*(1./(nS0-1.))*(log10(sMaxBnd)-log10(sMinBnd))+ log10(sMinBnd) g = np.zeros_like(ss) for i,p in enumerate(ss): g[i] = gcv(p,Lambda,aow,DCTy,IsFinite,Wtot,y,nof,noe,smoothOrder) #print 10**p,g[i] xpost = [ss[g==g.min()]] #print '===============' #print nit,tol,g.min(),xpost[0],s #print '===============' else: xpost = [s0] xpost,f,d = lbfgsb.fmin_l_bfgs_b(gcv,xpost,fprime=None,factr=10.,\ approx_grad=True,bounds=[(log10(sMinBnd),log10(sMaxBnd))],\ args=(Lambda,aow,DCTy,IsFinite,Wtot,y,nof,noe,smoothOrder)) s = 10**xpost[0]; # update the value we use for the initial s estimate s0 = xpost[0] Gamma = 1./(1+(s*abs(Lambda))**smoothOrder); z = RF*dctND(Gamma*DCTy,f=idct) + (1-RF)*z; # if no weighted/missing data => tol=0 (no iteration) tol = isweighted*norm(z0-z)/norm(z); z0 = z; # re-initialization exitflag = nit<MaxIter; if isrobust: #-- Robust Smoothing: iteratively re-weighted process #--- average leverage h = sqrt(1+16.*s); h = sqrt(1+h)/sqrt(2)/h; h = h**N; #--- take robust weights into account Wtot = W*RobustWeights(y-z,IsFinite,h,weightstr); #--- re-initialize for another iterative weighted process isweighted = True; tol = 1; nit = 0; #--- RobustStep = RobustStep+1; RobustIterativeProcess = RobustStep<3; # 3 robust steps are enough. else: RobustIterativeProcess = False; # stop the whole process ## Warning messages #--- if isauto: if abs(log10(s)-log10(sMinBnd))<errp: warning('MATLAB:smoothn:SLowerBound',\ ['s = %.3f '%(s) + ': the lower bound for s '\ + 'has been reached. Put s as an input variable if required.']) elif abs(log10(s)-log10(sMaxBnd))<errp: warning('MATLAB:smoothn:SUpperBound',\ ['s = %.3f '%(s) + ': the upper bound for s '\ + 'has been reached. Put s as an input variable if required.']) #warning('MATLAB:smoothn:MaxIter',\ # ['Maximum number of iterations (%d'%(MaxIter) + ') has '\ # + 'been exceeded. Increase MaxIter option or decrease TolZ value.']) return z,s,exitflag,Wtot
import openbabel import numpy from scipy.optimize.lbfgsb import fmin_l_bfgs_b def f(x): return x[0]**2 + x[1]**2 def g(x): return numpy.array([2 * x[0], 2 * x[1]]) x0 = numpy.array([3, 1]) opt, energy, dict = fmin_l_bfgs_b(f, x0, fprime=g) print opt