def run(self, function, beta): """Find the minimiser of the given function, starting at beta. Parameters ---------- function : Function. The function to minimise. beta : Numpy array. The start vector. """ if self.info_requested(Info.ok): self.info_set(Info.ok, False) step = function.step(beta) betanew = betaold = beta if self.info_requested(Info.time): t = [] if self.info_requested(Info.fvalue) \ or self.info_requested(Info.func_val): f = [] if self.info_requested(Info.converged): self.info_set(Info.converged, False) for i in range(1, self.max_iter + 1): if self.info_requested(Info.time): tm = utils.time_cpu() step = function.step(betanew) betaold = betanew betanew = betaold - step * function.grad(betaold) if self.info_requested(Info.time): t.append(utils.time_cpu() - tm) if self.info_requested(Info.fvalue) \ or self.info_requested(Info.func_val): f.append(function.f(betanew)) if maths.norm(betanew - betaold) < self.eps \ and i >= self.min_iter: if self.info_requested(Info.converged): self.info_set(Info.converged, True) break if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, i) if self.info_requested(Info.time): self.info_set(Info.time, t) if self.info_requested(Info.fvalue) \ or self.info_requested(Info.func_val): self.info_set(Info.fvalue, f) self.info_set(Info.func_val, f) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return betanew
def run(self, function, beta): """Find the minimiser of the given function, starting at beta. Parameters ---------- function : Function. The function to minimise. beta : Numpy array, p-by-1. The start vector. """ if self.info_requested(Info.ok): self.info_set(Info.ok, False) if self.info_requested(Info.time): t = [] if self.info_requested(Info.func_val): f = [] if self.info_requested(Info.converged): self.info_set(Info.converged, False) aold = anew = 1.0 thetaold = thetanew = beta betanew = betaold = beta for i in range(1, self.max_iter + 1): if self.info_requested(Info.time): tm = utils.time_cpu() step = function.step(betanew) betaold = betanew thetaold = thetanew aold = anew thetanew = betaold - step * function.grad(betaold) anew = (1.0 + np.sqrt(4.0 * aold * aold + 1.0)) / 2.0 betanew = thetanew + (aold - 1.0) * (thetanew - thetaold) / anew if self.info_requested(Info.time): t.append(utils.time_cpu() - tm) if self.info_requested(Info.func_val): f.append(function.f(betanew)) if maths.norm(betanew - betaold) < self.eps \ and i >= self.min_iter: if self.info_requested(Info.converged): self.info_set(Info.converged, True) break if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, i) if self.info_requested(Info.time): self.info_set(Info.time, t) if self.info_requested(Info.func_val): self.info_set(Info.func_val, f) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return betanew
def run(self, functions, xy): """Finds the minimum of two functions with associated proximal operators. Parameters ---------- functions : List or tuple with two Functions or a SplittableFunction. The two functions. xy : List or tuple with two elements, numpy arrays. The starting points for the minimisation. """ if self.info_requested(Info.ok): self.info_set(Info.ok, False) if self.info_requested(Info.time): t = [] if self.info_requested(Info.fvalue): f = [] if self.info_requested(Info.converged): self.info_set(Info.converged, False) funcs = [functions.g, functions.h] x_new = xy[0] y_new = xy[1] z_new = x_new.copy() u_new = y_new.copy() for i in range(1, self.max_iter + 1): if self.info_requested(Info.time): tm = utils.time_cpu() x_old = x_new z_old = z_new u_old = u_new if isinstance(funcs[0], properties.ProximalOperator): x_new = funcs[0].prox(z_old - u_old) else: x_new = funcs[0].proj(z_old - u_old) y_new = x_new # TODO: Allow a linear operator here. if isinstance(funcs[1], properties.ProximalOperator): z_new = funcs[1].prox(y_new + u_old) else: z_new = funcs[1].proj(y_new + u_old) # The order here is important! Do not change! u_new = (y_new - z_new) + u_old if self.info_requested(Info.time): t.append(utils.time_cpu() - tm) if self.info_requested(Info.fvalue): fval = funcs[0].f(z_new) + funcs[1].f(z_new) f.append(fval) if not self.simulation: if i == 1: if maths.norm(x_new - x_old) < self.eps \ and i >= self.min_iter: # print "Stopping criterion kicked in!" if self.info_requested(Info.converged): self.info_set(Info.converged, True) break else: if maths.norm(x_new - x_old) / maths.norm(x_old) < self.eps \ and i >= self.min_iter: # print "Stopping criterion kicked in!" if self.info_requested(Info.converged): self.info_set(Info.converged, True) break # Update the penalty parameter, rho, dynamically. if self.mu > 1.0: r = x_new - z_new s = (z_new - z_old) * -self.rho norm_r = maths.norm(r) norm_s = maths.norm(s) # print "norm(r): ", norm_r, ", norm(s): ", norm_s, ", rho:", \ # self.rho if norm_r > self.mu * norm_s: self.rho *= self.tau u_new *= 1.0 / self.tau # Rescale dual variable. elif norm_s > self.mu * norm_r: self.rho /= self.tau u_new *= self.tau # Rescale dual variable. # Update the penalty parameter in the functions. functions.set_rho(self.rho) self.num_iter = i if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, i) if self.info_requested(Info.time): self.info_set(Info.time, t) if self.info_requested(Info.fvalue): self.info_set(Info.fvalue, f) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return z_new
def run(self, function, beta): """Find the minimiser of the given function, starting at beta. Parameters ---------- function : Function. The function to minimise. beta : Numpy array. The start vector. """ if self.info_requested(Info.ok): self.info_set(Info.ok, False) step = function.step(beta) betanew = betaold = beta if self.info_requested(Info.time): t = [] if self.info_requested(Info.fvalue) \ or self.info_requested(Info.func_val): f = [] if self.info_requested(Info.converged): self.info_set(Info.converged, False) for i in range(1, self.max_iter + 1): if self.info_requested(Info.time): tm = utils.time_cpu() step = function.step(betanew) betaold = betanew betanew = function.prox(betaold - step * function.grad(betaold), step, eps=1.0 / (float(i) ** (2.0 + consts.FLOAT_EPSILON)), max_iter=self.max_iter) if self.info_requested(Info.time): t.append(utils.time_cpu() - tm) if self.info_requested(Info.fvalue) \ or self.info_requested(Info.func_val): f.append(function.f(betanew)) if self.callback is not None: self.callback(locals()) if (1.0 / step) * maths.norm(betanew - betaold) < self.eps \ and i >= self.min_iter: if self.info_requested(Info.converged): self.info_set(Info.converged, True) break self.num_iter = i if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, i) if self.info_requested(Info.time): self.info_set(Info.time, t) if self.info_requested(Info.fvalue): self.info_set(Info.fvalue, f) if self.info_requested(Info.func_val): self.info_set(Info.func_val, f) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return betanew
def run(self, function, beta): # Copy the allowed info keys for FISTA. fista_info = list() for nfo in self.info_copy(): if nfo in FISTA.INFO_PROVIDED: fista_info.append(nfo) # if not self.fista_info.allows(Info.num_iter): # self.fista_info.add_key(Info.num_iter) # Create the inner algorithm. algorithm = FISTA(eps=self.eps, max_iter=self.max_iter, min_iter=self.min_iter, info=fista_info) if self.info_requested(Info.ok): self.info_set(Info.ok, False) if self.mu_start is None: mu = [function.estimate_mu(beta)] else: mu = [self.mu_start] function.set_mu(self.mu_min) tmin = function.step(beta) function.set_mu(mu[0]) max_eps = function.eps_max(mu[0]) G = min(max_eps, function.eps_opt(mu[0])) if self.info_requested(Info.time): t = [] if self.info_requested(Info.fvalue): f = [] if self.info_requested(Info.gap): Gval = [] if self.info_requested(Info.converged): self.info_set(Info.converged, False) i = 0 while True: stop = False tnew = function.step(beta) eps_plus = min(max_eps, function.eps_opt(mu[-1])) # print "current iterations: ", self.num_iter, \ # ", iterations left: ", self.max_iter - self.num_iter algorithm.set_params(step=tnew, eps=eps_plus, max_iter=self.max_iter - self.num_iter, conesta_stop=None) # conesta_stop=[self.mu_min]) # self.fista_info.clear() beta = algorithm.run(function, beta) #print "CONESTA loop", i, "FISTA=",self.fista_info[Info.num_iter], "TOT iter:", self.num_iter self.num_iter += algorithm.num_iter if Info.time in algorithm.info: tval = algorithm.info_get(Info.time) if Info.fvalue in algorithm.info: fval = algorithm.info_get(Info.fvalue) self.mu_min = min(self.mu_min, mu[-1]) tmin = min(tmin, tnew) old_mu = function.set_mu(self.mu_min) # Take one ISTA step for use in the stopping criterion. beta_tilde = function.prox(beta - tmin * function.grad(beta), tmin) function.set_mu(old_mu) if (1.0 / tmin) * maths.norm(beta - beta_tilde) < self.eps: if self.info_requested(Info.converged): self.info_set(Info.converged, True) stop = True if self.num_iter >= self.max_iter: stop = True if self.info_requested(Info.time): gap_time = utils.time_cpu() if self.dynamic: G_new = function.gap(beta, eps=eps_plus, max_iter=self.max_iter - self.num_iter) # TODO: Warn if G_new < 0. G_new = abs(G_new) # Just in case ... if G_new < G: G = G_new else: G = self.tau * G else: # Static G = self.tau * G if self.info_requested(Info.time): gap_time = utils.time_cpu() - gap_time tval[-1] += gap_time t = t + tval if self.info_requested(Info.fvalue): f = f + fval if self.info_requested(Info.gap): Gval.append(G) if (G <= consts.TOLERANCE and mu[-1] <= consts.TOLERANCE) or stop: break mu_new = min(mu[-1], function.mu_opt(G)) self.mu_min = min(self.mu_min, mu_new) if self.info_requested(Info.mu): mu = mu + [max(self.mu_min, mu_new)] * len(fval) else: mu.append(max(self.mu_min, mu_new)) function.set_mu(mu_new) i = i + 1 if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, i + 1) if self.info_requested(Info.time): self.info_set(Info.time, t) if self.info_requested(Info.fvalue): self.info_set(Info.fvalue, f) if self.info_requested(Info.gap): self.info_set(Info.gap, Gval) if self.info_requested(Info.mu): self.info_set(Info.mu, mu) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return beta
def run(self, X, y, beta=None): """Find the minimiser of the associated function, starting at beta. Parameters ---------- X : Numpy array, shape n-by-p. The matrix X with independent variables. y : Numpy array, shape n-by-1. The response variable y. beta : Numpy array. Optional starting point. """ if self.info_requested(Info.ok): self.info_set(Info.ok, False) if self.info_requested(Info.time): t = [] if self.info_requested(Info.fvalue): f = [] if self.info_requested(Info.converged): self.info_set(Info.converged, False) n, p = X.shape if beta is None: beta = self.start_vector.get_weights(p) else: beta = beta.copy() function = functions.CombinedFunction() function.add_loss(functions.losses.LinearRegression(X, y, mean=False)) function.add_prox(penalties.L1(l=self.l)) xTx = np.sum(X ** 2.0, axis=0) if self.mean: xTx *= 1.0 / float(n) for i in range(1, self.max_iter + 1): if self.info_requested(Info.time): tm = utils.time_cpu() # The update has an error that propagates. This resets the # approximation. We may not need to do this at every iteration. y_Xbeta = y - np.dot(X, beta) betaold = beta.copy() for j in range(p): xj = X[:, [j]] betaj = beta[j, 0] if xTx[j] < consts.TOLERANCE: # Avoid division-by-zero. bj = 0.0 else: bj = np.dot(xj.T, y_Xbeta + xj * betaj)[0, 0] if self.mean: bj /= float(n) if j < self.penalty_start: bj = bj / xTx[j] else: # Soft thresholding. bj = np.sign(bj) \ * max(0.0, (abs(bj) - self.l) / xTx[j]) y_Xbeta -= xj * (bj - betaj) # Update X.beta. beta[j] = bj # Save result. if self.info_requested(Info.time): t.append(utils.time_cpu() - tm) if self.info_requested(Info.fvalue): f_ = self._f(y_Xbeta, y, beta) f.append(f_) # print "err:", maths.norm(beta - betaold) if maths.norm(beta - betaold) < self.eps \ and i >= self.min_iter: if self.info_requested(Info.converged): self.info_set(Info.converged, True) # print "iterations: ", i break self.num_iter = i if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, i) if self.info_requested(Info.time): self.info_set(Info.time, t) if self.info_requested(Info.fvalue): self.info_set(Info.fvalue, f) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return beta
def run(self, function, x, **kwargs): """This function returns the sample with highest probability over the distribution defined by the given negative log-likelihood function. Note: The procedure returns the sample that had the maximum value of exp(-function.f(x)), which corresponds to the sample that had the minimum value of function.f(x), i.e., the function represents the negative log-likelihood of the distribution function. Parameters ---------- function : parsimony.functions.properties.Function The negative log-likelihood function to minimise. x : numpy.ndarray The initial point. """ if self.info_requested(Info.ok): self.info_set(Info.ok, False) if self.info_requested(Info.time): time = [] if self.info_requested(Info.func_val): func_val = [] if self.info_requested(Info.iterates): iterates = [] # Keep track of the best value (MAP approximation) lnprob_max = -np.inf x_max = x if self.info_requested(Info.func_val): # Initial value from start vector f = [-function.f(x)] lnprob_max = f[-1] # Number of parameters p = x.size for it in range(1, self.max_iter + 1): if self.info_requested(Info.time): tm = utils.time_cpu() # Sample a proposal point y = np.copy(x) for i in range(p): y = self.Q.random_sample(y, i, copy=False) lnprob_x = -function.f(y) if self.info_requested(Info.func_val): f.append(lnprob_x) x = y # Store best sample visited if lnprob_x > lnprob_max: lnprob_max = lnprob_x x_max = x if self.info_requested(Info.time): time.append(utils.time_cpu() - tm) if it % self.thinning == 0: if self.info_requested(Info.iterates): iterates.append(x) if self.info_requested(Info.func_val): func_val.append(lnprob_x) if self.callback is not None: self.callback(locals()) self.num_iter = it if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, it) if self.info_requested(Info.time): self.info_set(Info.time, time) if self.info_requested(Info.func_val): self.info_set(Info.func_val, func_val) if self.info_requested(Info.iterates): self.info_set(Info.iterates, iterates) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return x_max
def run(self, function, beta): # Copy the allowed info keys for FISTA. fista_info = list() for nfo in self.info_copy(): if nfo in FISTA.INFO_PROVIDED: fista_info.append(nfo) # CONESTA always asks for the gap. if Info.gap not in fista_info: fista_info.append(Info.gap) # Create the inner algorithm. algorithm = FISTA(use_gap=True, info=fista_info, eps=self.eps, max_iter=self.max_iter, min_iter=self.min_iter) # Not ok until the end. if self.info_requested(Info.ok): self.info_set(Info.ok, False) # Time the init computation (essentialy Lipchitz constant in mu_opt). if self.info_requested(Info.time): init_time = utils.time_cpu() # Compute current gap, precision eps (gap decreased by tau) and mu. function.set_mu(consts.TOLERANCE) gap = function.gap(beta, eps=self.eps, max_iter=self.max_iter) eps = self.tau * abs(gap) # Warning below if gap < -consts.TOLERANCE: See Special case 1 gM = function.eps_max(1.0) loop = True # Special case 1: gap is very small: stopping criterion satisfied if gap < self.eps: # "- mu * gM" has been removed since mu == 0 warnings.warn( "Stopping criterion satisfied before the first iteration." " Either beta_start a the solution (given eps)." " If beta_start is null the problem might be over-penalized. " " Then try smaller penalization.") loop = False # Special case 2: gap infinite or NaN => eps is not finite or NaN # => mu is NaN etc. Force eps to a large value, to force some FISTA # iteration to getbetter starting point if not np.isfinite(eps): eps = self.eps_max if loop: # mu is useless if loop is False mu = function.mu_opt(eps) function.set_mu(mu) #gM = function.eps_max(1.0) # Initialise info variables. Info variables have the suffix "_". if self.info_requested(Info.time): t_ = [] init_time = utils.time_cpu() - init_time if self.info_requested(Info.fvalue) \ or self.info_requested(Info.func_val): f_ = [] if self.info_requested(Info.gap): gap_ = [] if self.info_requested(Info.converged): self.info_set(Info.converged, False) if self.info_requested(Info.mu): mu_ = [] i = 0 # Iteration counter. while loop: converged = False # Current precision. eps_mu = max(eps, self.eps) - mu * gM # Set current parameters to algorithm. algorithm.set_params(eps=eps_mu, max_iter=self.max_iter - self.num_iter) # Run FISTA. beta = algorithm.run(function, beta) # Update global iteration counter. self.num_iter += algorithm.num_iter # Get info from algorithm. if Info.time in algorithm.info and \ self.info_requested(Info.time): t_ += algorithm.info_get(Info.time) if i == 0: # Add init time to first iteration. t_[0] += init_time if Info.func_val in algorithm.info and \ self.info_requested(Info.func_val): f_ += algorithm.info_get(Info.func_val) elif Info.fvalue in algorithm.info and \ self.info_requested(Info.fvalue): f_ += algorithm.info_get(Info.fvalue) if self.info_requested(Info.mu): mu_ += [mu] * algorithm.num_iter if self.info_requested(Info.gap): gap_ += algorithm.info_get(Info.gap) # Obtain the gap from the last FISTA run. May be small and negative # close to machine epsilon. gap_mu = abs(algorithm.info_get(Info.gap)[-1]) # TODO: Warn if gap_mu < -consts.TOLERANCE. if not self.simulation: if gap_mu + mu * gM < self.eps: if self.info_requested(Info.converged): self.info_set(Info.converged, True) converged = True if self.callback is not None: self.callback(locals()) if self.info_requested(Info.verbose): print("CONESTA ite:%i, gap_mu: %g, eps: %g, mu: %g, " "eps_mu: %g" % (i, gap_mu, eps, mu, eps_mu)) # Stopping criteria. if (converged or self.num_iter >= self.max_iter) \ and self.num_iter >= self.min_iter: break # Update the precision eps. # eps = self.tau * (gap_mu + mu * gM) eps = max(self.eps, self.tau * (gap_mu + mu * gM)) # Compute and update mu. # mu = max(self.mu_min, min(function.mu_opt(eps), mu)) mu = min(function.mu_opt(eps), mu) function.set_mu(mu) i = i + 1 if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, self.num_iter) if self.info_requested(Info.continuations): self.info_set(Info.continuations, i + 1) if self.info_requested(Info.time): self.info_set(Info.time, t_) if self.info_requested(Info.func_val): self.info_set(Info.func_val, f_) if self.info_requested(Info.fvalue): self.info_set(Info.fvalue, f_) if self.info_requested(Info.gap): self.info_set(Info.gap, gap_) if self.info_requested(Info.mu): self.info_set(Info.mu, mu_) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return beta
def run(self, X, y, beta=None): """Find the minimiser of the associated function, starting at beta. Parameters ---------- X : Numpy array, shape n-by-p. The matrix X with independent variables. y : Numpy array, shape n-by-1. The response variable y. beta : Numpy array. Optional starting point. """ if self.info_requested(Info.ok): self.info_set(Info.ok, False) if self.info_requested(Info.time): t = [] if self.info_requested(Info.fvalue): f = [] if self.info_requested(Info.converged): self.info_set(Info.converged, False) n, p = X.shape if beta is None: beta = self.start_vector.get_weights(p) else: beta = beta.copy() xTx = np.sum(X**2, axis=0) if self.mean: xTx *= 1.0 / float(n) for i in range(1, self.max_iter + 1): if self.info_requested(Info.time): tm = utils.time_cpu() # The update has an error that propagates. This resets the # approximation. We may not need to do this at every iteration. Xbeta_y = np.dot(X, beta) - y betaold = beta.copy() for j in range(p): xj = X[:, [j]] betaj = beta[j] # Solve for beta[j]. if xTx[j] < consts.TOLERANCE: # Avoid division-by-zero. bj = 0.0 else: # Intercept. S0 = np.dot(xj.T, Xbeta_y - xj * betaj)[0, 0] if self.mean: S0 /= float(n) if j < self.penalty_start: bj = -S0 / xTx[j] else: if S0 > self.l: bj = (self.l - S0) / xTx[j] elif S0 < -self.l: bj = (-self.l - S0) / xTx[j] else: bj = 0.0 Xbeta_y += xj * (bj - betaj) # Update X.beta. beta[j] = bj # Save result. if self.info_requested(Info.time): t.append(utils.time_cpu() - tm) if self.info_requested(Info.fvalue): f_ = self._f(Xbeta_y, y, beta) f.append(f_) # print "f:", f[-1] # print "err:", maths.norm(beta - betaold) if maths.norm(beta - betaold) < self.eps \ and i >= self.min_iter: if self.info_requested(Info.converged): self.info_set(Info.converged, True) # print "iterations: ", i break self.num_iter = i if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, i) if self.info_requested(Info.time): self.info_set(Info.time, t) if self.info_requested(Info.fvalue): self.info_set(Info.fvalue, f) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return beta
def run(self, function, x, **kwargs): """This function returns the sample with highest probability over the distribution defined by the given negative log-likelihood function. Note: The procedure returns the sample that had the maximum value of exp(-function.f(x)), which corresponds to the sample that had the minimum value of function.f(x), i.e., the function represents the negative log-likelihood of the distribution function. Parameters ---------- function : parsimony.functions.properties.Function The negative log-likelihood function to minimise. x : numpy.ndarray, shape (p, 1) or (p, num_walkers) The initial point. """ if x.ndim > 2: raise ValueError("The staring points must be a numpy array of " "shape (p, 1) or (p, num_walkers).") if x.ndim < 2: # Make x a column vector x = np.atleast_2d(x) if x.shape[0] < x.shape[1]: x = x.T if self.info_requested(Info.ok): self.info_set(Info.ok, False) if self.info_requested(Info.time): time = [] p = x.shape[0] # Dimension of parameter space num_walkers = max(p + 1, self.num_walkers) if self.info_requested(Info.func_val): func_val = [] for i in range(num_walkers): func_val.append([]) if self.info_requested(Info.iterates): iterates = [] for i in range(num_walkers): iterates.append([]) # For the acceptance rate accepted = [0] * num_walkers # Initial value from start vector lnprob_x = [0] * num_walkers lnprob_y = [0] * num_walkers if x.shape[1] < num_walkers: X = np.zeros((p, num_walkers)) X[:, :x.shape[1]] = x for i in range(num_walkers - x.shape[1]): if x.shape[1] > 1: rnd_col = np.random.randint(x.shape[1]) x_ = x[:, [rnd_col]] else: x_ = x X[:, [x.shape[1] + i]] = self.Q.random_sample(x) elif x.shape[1] == num_walkers: X = x else: raise ValueError("The number of parameters is greater than the " "number of walkers. This should not be able to " "happen here. Please report this error so that " "we can fix it!") for i in range(num_walkers): lnprob_x[i] = -function.f(X[:, [i]]) # Keep track of best value (sort of like MAP approximations) lnprob_max = [-np.inf] * num_walkers X_max = X import warnings warnings.filterwarnings('error') for it in range(1, self.max_iter + 1): if self.info_requested(Info.time): tm = utils.time_cpu() # Select another walker randomly for each walker walker_idx = np.mod( np.arange(num_walkers) + np.floor(self.random_state.rand() * (num_walkers - 1)) + 1, num_walkers).astype(np.int) # Sample stretch move size z = ((self.step_size - 1.0) * self.random_state.rand(num_walkers) + 1)**2.0 \ / self.step_size # Construct proposal points for all walkers Xj = X[:, walker_idx] Y = Xj + z * (X - Xj) ln_r = np.log(self.random_state.rand(num_walkers)) for i in range(num_walkers): y = Y[:, [i]] lnprob_y[i] = -function.f(y) if (lnprob_y[i] == -np.inf) or (lnprob_x[i] == -np.inf): ln_q_i = -np.inf else: ln_q_i = (p - 1) * np.log(z[i]) + lnprob_y[i] - lnprob_x[i] if ln_r[i] <= ln_q_i: X[:, [i]] = y lnprob_x[i] = lnprob_y[i] accepted[i] += 1 # Store best sample visited if lnprob_x[i] > lnprob_max[i]: lnprob_max[i] = lnprob_x[i] X_max = X[:, [i]] if self.info_requested(Info.time): time.append(utils.time_cpu() - tm) if (it - 1) % self.thinning == 0: if self.info_requested(Info.iterates): for i in range(num_walkers): iterates[i].append(X[:, [i]]) if self.info_requested(Info.func_val): for i in range(num_walkers): func_val[i].append(lnprob_x[i]) if self.callback is not None: self.callback(locals()) self.num_iter = it if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, it) if self.info_requested(Info.time): self.info_set(Info.time, time) if self.info_requested(Info.func_val): self.info_set(Info.func_val, func_val) if self.info_requested(Info.iterates): self.info_set(Info.iterates, iterates) if self.info_requested(Info.acceptance_rate): acceptance_rate = [] for i in range(num_walkers): acceptance_rate.append(accepted[i] / float(it)) self.info_set(Info.acceptance_rate, acceptance_rate) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return X_max
def run(self, function, x, **kwargs): """This function returns the sample with highest probability over the distribution defined by the given negative log-likelihood function. Note: The procedure returns the sample that had the maximum value of exp(-function.f(x)), which corresponds to the sample that had the minimum value of function.f(x), i.e., the function represents the negative log-likelihood of the distribution function. Parameters ---------- function : parsimony.functions.properties.Function The negative log-likelihood function to minimise. x : numpy.ndarray The initial point. """ if self.info_requested(Info.ok): self.info_set(Info.ok, False) if self.info_requested(Info.time): time = [] if self.info_requested(Info.func_val): func_val = [] if self.info_requested(Info.iterates): iterates = [] # Keep track of best value (MAP approximation) lnprob_max = -np.inf x_max = x # For the acceptance rate accepted = 0 # Initial value from start vector lnprob_x = -function.f(x) for it in range(1, self.max_iter + 1): if self.info_requested(Info.time): tm = utils.time_cpu() # Sample a proposal point y = self.Q.random_sample(x) lnprob_y = -function.f(y) ln_q = lnprob_y - lnprob_x # Compute transition probabilities if not self.Q.is_symmetric(): ln_q_y_x = self.Q.transition_lnprob(y, x) # Move from x to y ln_q_x_y = self.Q.transition_lnprob(x, y) # Move from y to x ln_q += ln_q_x_y - ln_q_y_x log_r = np.log(self.random_state.rand()) if ln_q >= 0.0 or ln_q >= log_r: x = y lnprob_x = lnprob_y accepted += 1 # Store best sample visited if lnprob_x > lnprob_max: lnprob_max = lnprob_x x_max = x if self.info_requested(Info.time): time.append(utils.time_cpu() - tm) if it % self.thinning == 0: if self.info_requested(Info.iterates): iterates.append(x) if self.info_requested(Info.func_val): func_val.append(lnprob_x) if self.callback is not None: self.callback(locals()) self.num_iter = it if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, it) if self.info_requested(Info.time): self.info_set(Info.time, time) if self.info_requested(Info.func_val): self.info_set(Info.func_val, func_val) if self.info_requested(Info.iterates): self.info_set(Info.iterates, iterates) if self.info_requested(Info.acceptance_rate): self.info_set(Info.acceptance_rate, accepted / float(it)) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return x_max
def run(self, function, beta): """Find the minimiser of the given function, starting at beta. Parameters ---------- function : Function. The function to minimise. beta : Numpy array. The start vector. """ if self.info_requested(Info.ok): self.info_set(Info.ok, False) z = betanew = betaold = beta if self.info_requested(Info.time): t_ = [] if self.info_requested(Info.fvalue) \ or self.info_requested(Info.func_val): f_ = [] if self.info_requested(Info.converged): self.info_set(Info.converged, False) if self.info_requested(Info.gap): gap_ = [] if self.return_best: best_f = np.inf best_beta = None #print("########", max(self.min_iter, self.max_iter) + 1) for i in range(1, max(self.min_iter, self.max_iter) + 1): if self.info_requested(Info.time): tm = utils.time_cpu() z = betanew + ((i - 2.0) / (i + 1.0)) * (betanew - betaold) step = function.step(z) betaold = betanew betanew = function.prox(z - step * function.grad(z), step, eps=1.0 / (float(i) ** (4.0 + consts.FLOAT_EPSILON)), max_iter=self.max_iter) if self.info_requested(Info.time): t_.append(utils.time_cpu() - tm) if self.info_requested(Info.fvalue) \ or self.info_requested(Info.func_val): func_val = function.f(betanew) f_.append(func_val) if self.return_best and func_val < best_f: best_f = func_val best_beta = betanew if self.callback is not None: self.callback(locals()) if self.use_gap: gap = function.gap(betanew, eps=self.eps, max_iter=self.max_iter) # TODO: Warn if G_new < -consts.TOLERANCE. gap = abs(gap) # May happen close to machine epsilon. if self.info_requested(Info.gap): gap_.append(gap) if not self.simulation: if self.info_requested(Info.verbose): print("FISTA ite:%i, gap:%g" % (i, gap)) if gap < self.eps: if self.info_requested(Info.converged): self.info_set(Info.converged, True) break else: if not self.simulation: eps_cur = maths.norm(betanew - z) if self.info_requested(Info.verbose): print("FISTA ite: %i, eps_cur:%g" % (i, eps_cur)) if step > 0.0: if (1.0 / step) * eps_cur < self.eps \ and i >= self.min_iter: if self.info_requested(Info.converged): self.info_set(Info.converged, True) break else: # TODO: Fix this! if maths.norm(betanew - z) < self.eps \ and i >= self.min_iter: if self.info_requested(Info.converged): self.info_set(Info.converged, True) break self.num_iter = i if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, i) if self.info_requested(Info.time): self.info_set(Info.time, t_) if self.info_requested(Info.fvalue): self.info_set(Info.fvalue, f_) if self.info_requested(Info.func_val): self.info_set(Info.func_val, f_) if self.info_requested(Info.gap): self.info_set(Info.gap, gap_) if self.info_requested(Info.ok): self.info_set(Info.ok, True) if self.return_best and best_beta is not None: return best_beta else: return betanew
def run(self, function, beta): """Find the minimiser of the given function, starting at beta. Parameters ---------- function : Function. The function to minimise. beta : Numpy array. The start vector. """ if self.info_requested(Info.ok): self.info_set(Info.ok, False) # step = function.step(beta) z = betanew = betaold = beta if self.info_requested(Info.time): t = [] if self.info_requested(Info.fvalue): f = [] if self.info_requested(Info.converged): self.info_set(Info.converged, False) for i in xrange(1, max(self.min_iter, self.max_iter) + 1): if self.info_requested(Info.time): tm = utils.time_cpu() z = betanew + ((i - 2.0) / (i + 1.0)) * (betanew - betaold) step = function.step(z) betaold = betanew betanew = function.prox(z - step * function.grad(z), step) if self.info_requested(Info.time): t.append(utils.time_cpu() - tm) if self.info_requested(Info.fvalue): f.append(function.f(betanew)) if self.conesta_stop is not None: mu_min = self.conesta_stop[0] # print "mu_min:", mu_min mu_old = function.set_mu(mu_min) # print "mu_old:", mu_old stop_step = function.step(betanew) # print "step :", step # Take one ISTA step for use in the stopping criterion. stop_z = function.prox(betanew - stop_step \ * function.grad(betanew), stop_step) function.set_mu(mu_old) # print "err :", maths.norm(betanew - z) # print "sc err:", (1.0 / step) * maths.norm(betanew - z) # print "eps :", self.eps if (1. / stop_step) * maths.norm(betanew - stop_z) < self.eps \ and i >= self.min_iter: if self.info_requested(Info.converged): self.info_set(Info.converged, True) break else: if step > 0.0: if (1.0 / step) * maths.norm(betanew - z) < self.eps \ and i >= self.min_iter: if self.info_requested(Info.converged): self.info_set(Info.converged, True) break else: # TODO: Fix this! if maths.norm(betanew - z) < self.eps \ and i >= self.min_iter: if self.info_requested(Info.converged): self.info_set(Info.converged, True) break self.num_iter = i if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, i) if self.info_requested(Info.time): self.info_set(Info.time, t) if self.info_requested(Info.fvalue): self.info_set(Info.fvalue, f) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return betanew
def run(self, X, y, beta=None): """Find the minimiser of the associated function, starting at beta. Parameters ---------- X : Numpy array, shape n-by-p. The matrix X with independent variables. y : Numpy array, shape n-by-1. The response variable y. beta : Numpy array. Optional starting point. """ if self.info_requested(Info.ok): self.info_set(Info.ok, False) if self.info_requested(Info.time): t = [] if self.info_requested(Info.fvalue): f = [] if self.info_requested(Info.converged): self.info_set(Info.converged, False) n, p = X.shape if beta is None: beta = self.start_vector.get_vector(p) else: beta = beta.copy() function = functions.CombinedFunction() function.add_function(functions.losses.LinearRegression(X, y, mean=False)) function.add_prox(penalties.L1(l=self.l)) xTx = np.sum(X ** 2.0, axis=0) if self.mean: xTx *= 1.0 / float(n) for i in range(1, self.max_iter + 1): if self.info_requested(Info.time): tm = utils.time_cpu() # The update has an error that propagates. This resets the # approximation. We may not need to do this at every iteration. y_Xbeta = y - np.dot(X, beta) betaold = beta.copy() for j in range(p): xj = X[:, [j]] betaj = beta[j, 0] if xTx[j] < consts.TOLERANCE: # Avoid division-by-zero. bj = 0.0 else: bj = np.dot(xj.T, y_Xbeta + xj * betaj)[0, 0] if self.mean: bj /= float(n) if j < self.penalty_start: bj = bj / xTx[j] else: # Soft thresholding. bj = np.sign(bj) \ * max(0.0, (abs(bj) - self.l) / xTx[j]) y_Xbeta -= xj * (bj - betaj) # Update X.beta. beta[j] = bj # Save result. if self.info_requested(Info.time): t.append(utils.time_cpu() - tm) if self.info_requested(Info.fvalue): f_ = self._f(y_Xbeta, y, beta) f.append(f_) # print "err:", maths.norm(beta - betaold) if maths.norm(beta - betaold) < self.eps \ and i >= self.min_iter: if self.info_requested(Info.converged): self.info_set(Info.converged, True) # print "iterations: ", i break self.num_iter = i if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, i) if self.info_requested(Info.time): self.info_set(Info.time, t) if self.info_requested(Info.fvalue): self.info_set(Info.fvalue, f) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return beta
def run(self, function, x, **kwargs): """This function returns the sample with highest probability over the distribution defined by the given negative log-likelihood function. Note: The procedure returns the sample that had the maximum value of exp(-function.f(x)), which corresponds to the sample that had the minimum value of function.f(x), i.e., the function represents the negative log-likelihood of the distribution function. Parameters ---------- function : parsimony.functions.properties.Function The negative log-likelihood function to minimise. x : numpy.ndarray, shape (p, 1) or (p, num_walkers) The initial point. """ if x.ndim > 2: raise ValueError("The staring points must be a numpy array of " "shape (p, 1) or (p, num_walkers).") if x.ndim < 2: # Make x a column vector x = np.atleast_2d(x) if x.shape[0] < x.shape[1]: x = x.T if self.info_requested(Info.ok): self.info_set(Info.ok, False) if self.info_requested(Info.time): time = [] p = x.shape[0] # Dimension of parameter space num_walkers = max(p + 1, self.num_walkers) if self.info_requested(Info.func_val): func_val = [] for i in range(num_walkers): func_val.append([]) if self.info_requested(Info.iterates): iterates = [] for i in range(num_walkers): iterates.append([]) # For the acceptance rate accepted = [0] * num_walkers # Initial value from start vector lnprob_x = [0] * num_walkers lnprob_y = [0] * num_walkers if x.shape[1] < num_walkers: X = np.zeros((p, num_walkers)) X[:, :x.shape[1]] = x for i in range(num_walkers - x.shape[1]): if x.shape[1] > 1: rnd_col = np.random.randint(x.shape[1]) x_ = x[:, [rnd_col]] else: x_ = x X[:, [x.shape[1] + i]] = self.Q.random_sample(x) elif x.shape[1] == num_walkers: X = x else: raise ValueError("The number of parameters is greater than the " "number of walkers. This should not be able to " "happen here. Please report this error so that " "we can fix it!") for i in range(num_walkers): lnprob_x[i] = -function.f(X[:, [i]]) # Keep track of best value (sort of like MAP approximations) lnprob_max = [-np.inf] * num_walkers X_max = X import warnings warnings.filterwarnings('error') for it in range(1, self.max_iter + 1): if self.info_requested(Info.time): tm = utils.time_cpu() # Select another walker randomly for each walker walker_idx = np.mod(np.arange(num_walkers) + np.floor(self.random_state.rand() * (num_walkers - 1)) + 1, num_walkers).astype(np.int) # Sample stretch move size z = ((self.step_size - 1.0) * self.random_state.rand(num_walkers) + 1)**2.0 \ / self.step_size # Construct proposal points for all walkers Xj = X[:, walker_idx] Y = Xj + z * (X - Xj) ln_r = np.log(self.random_state.rand(num_walkers)) for i in range(num_walkers): y = Y[:, [i]] lnprob_y[i] = -function.f(y) if (lnprob_y[i] == -np.inf) or (lnprob_x[i] == -np.inf): ln_q_i = -np.inf else: ln_q_i = (p - 1) * np.log(z[i]) + lnprob_y[i] - lnprob_x[i] if ln_r[i] <= ln_q_i: X[:, [i]] = y lnprob_x[i] = lnprob_y[i] accepted[i] += 1 # Store best sample visited if lnprob_x[i] > lnprob_max[i]: lnprob_max[i] = lnprob_x[i] X_max = X[:, [i]] if self.info_requested(Info.time): time.append(utils.time_cpu() - tm) if (it - 1) % self.thinning == 0: if self.info_requested(Info.iterates): for i in range(num_walkers): iterates[i].append(X[:, [i]]) if self.info_requested(Info.func_val): for i in range(num_walkers): func_val[i].append(lnprob_x[i]) if self.callback is not None: self.callback(locals()) self.num_iter = it if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, it) if self.info_requested(Info.time): self.info_set(Info.time, time) if self.info_requested(Info.func_val): self.info_set(Info.func_val, func_val) if self.info_requested(Info.iterates): self.info_set(Info.iterates, iterates) if self.info_requested(Info.acceptance_rate): acceptance_rate = [] for i in range(num_walkers): acceptance_rate.append(accepted[i] / float(it)) self.info_set(Info.acceptance_rate, acceptance_rate) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return X_max
def run(self, function, beta): """Find the minimiser of the given function, starting at beta. Parameters ---------- function : Function The function to minimise. beta : numpy array The start vector. """ if self.info_requested(Info.ok): self.info_set(Info.ok, False) betanew = betaold = beta if self.use_gradient and hasattr(function, "grad"): function_grad = function.grad else: function_grad = function.subgrad if self.info_requested(Info.time): t = [] if self.info_requested(Info.func_val): f = [] if self.info_requested(Info.converged): self.info_set(Info.converged, False) fbest = np.inf betabest = None for i in xrange(1, self.max_iter + 1): if self.info_requested(Info.time): tm = utils.time_cpu() betaold = betanew subgrad = function_grad(betaold) step = self.step_size(i, betaold, subgrad) betanew = betaold - step * subgrad fval = None if self.use_best_f: fval = function.f(betanew) if fval < fbest: fbest = fval betabest = betanew if self.info_requested(Info.time): t.append(utils.time_cpu() - tm) if self.info_requested(Info.func_val): if self.use_best_f: f.append(fbest) else: if fval is None: f.append(function.f(betanew)) else: f.append(fval) if maths.norm(betanew - betaold) < self.eps \ and i >= self.min_iter: if self.info_requested(Info.converged): self.info_set(Info.converged, True) break if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, i) if self.info_requested(Info.time): self.info_set(Info.time, t) if self.info_requested(Info.func_val): self.info_set(Info.func_val, f) if self.info_requested(Info.ok): self.info_set(Info.ok, True) if self.use_best_f: return betabest else: return betanew
def run(self, X, y, beta=None): """Find the minimiser of the associated function, starting at beta. Parameters ---------- X : Numpy array, shape n-by-p. The matrix X with independent variables. y : Numpy array, shape n-by-1. The response variable y. beta : Numpy array. Optional starting point. """ if self.info_requested(Info.ok): self.info_set(Info.ok, False) if self.info_requested(Info.time): t = [] if self.info_requested(Info.fvalue): f = [] if self.info_requested(Info.converged): self.info_set(Info.converged, False) n, p = X.shape if beta is None: beta = self.start_vector.get_weights(p) else: beta = beta.copy() xTx = np.sum(X ** 2.0, axis=0) if self.mean: xTx *= 1.0 / float(n) for i in range(1, self.max_iter + 1): if self.info_requested(Info.time): tm = utils.time_cpu() # The update has an error that propagates. This resets the # approximation. We may not need to do this at every iteration. Xbeta_y = np.dot(X, beta) - y betaold = beta.copy() for j in range(p): xj = X[:, [j]] betaj = beta[j] # Solve for beta[j]. if xTx[j] < consts.TOLERANCE: # Avoid division-by-zero. bj = 0.0 else: # Intercept. S0 = np.dot(xj.T, Xbeta_y - xj * betaj)[0, 0] if self.mean: S0 /= float(n) if j < self.penalty_start: bj = -S0 / xTx[j] else: if S0 > self.l: bj = (self.l - S0) / xTx[j] elif S0 < -self.l: bj = (-self.l - S0) / xTx[j] else: bj = 0.0 Xbeta_y += xj * (bj - betaj) # Update X.beta. beta[j] = bj # Save result. if self.info_requested(Info.time): t.append(utils.time_cpu() - tm) if self.info_requested(Info.fvalue): f_ = self._f(Xbeta_y, y, beta) f.append(f_) # print "f:", f[-1] # print "err:", maths.norm(beta - betaold) if maths.norm(beta - betaold) < self.eps \ and i >= self.min_iter: if self.info_requested(Info.converged): self.info_set(Info.converged, True) # print "iterations: ", i break self.num_iter = i if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, i) if self.info_requested(Info.time): self.info_set(Info.time, t) if self.info_requested(Info.fvalue): self.info_set(Info.fvalue, f) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return beta
def run(self, function, beta): """Find the minimiser of the given function, starting at beta. Parameters ---------- function : parsimony.functions.properties.Function The function to minimise. beta : numpy.ndarray or list of numpy.ndarray The start point. """ if self.info_requested(Info.ok): self.info_set(Info.ok, False) is_list = False if isinstance(beta, list): is_list = True betanew = betaold = beta if self.info_requested(Info.time): t = [] if self.info_requested(Info.fvalue) \ or self.info_requested(Info.func_val): f = [] if self.info_requested(Info.converged): self.info_set(Info.converged, False) for i in range(1, self.max_iter + 1): if self.info_requested(Info.time): tm = utils.time_cpu() step = function.step(betanew, iteration=i) betaold = betanew grad = function.grad(betaold) if not is_list: betanew = betaold - step * grad else: betanew = [betaold[i] - step * grad[i] for i in range(len(betaold))] if self.info_requested(Info.time): t.append(utils.time_cpu() - tm) if self.info_requested(Info.fvalue) \ or self.info_requested(Info.func_val): f.append(function.f(betanew)) if not is_list: err = maths.norm(betanew - betaold) else: err = np.sqrt(np.sum([np.sum((betanew[i] - betaold[i])**2.0) for i in range(len(betanew))])) if err < self.eps and i >= self.min_iter: if self.info_requested(Info.converged): self.info_set(Info.converged, True) break self.num_iter = i if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, i) if self.info_requested(Info.time): self.info_set(Info.time, t) if self.info_requested(Info.fvalue) \ or self.info_requested(Info.func_val): self.info_set(Info.fvalue, f) self.info_set(Info.func_val, f) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return betanew
def run(self, function, beta=None): """The excessive gap method for strongly convex functions. Parameters ---------- function : The function to minimise. It contains two parts, function.g is the strongly convex part and function.h is the smoothed part of the function. beta : Numpy array. A start vector. This is normally not given, but left None, since the start vector is computed by the algorithm. """ if self.info_requested(Info.ok): self.info_set(Info.ok, False) A = function.A() u = [0] * len(A) for i in xrange(len(A)): u[i] = np.zeros((A[i].shape[0], 1)) # L = lambda_max(A'A) / (lambda_min(X'X) + k) L = function.L() if L < consts.TOLERANCE: L = consts.TOLERANCE mu = [2.0 * L] function.set_mu(mu) if beta is not None: beta0 = beta else: beta0 = function.betahat(u) # u is zero here beta = beta0 alpha = function.V(u, beta, L) # u is zero here if self.info_requested(Info.time): t = [] if self.info_requested(Info.fvalue): f = [] if self.info_requested(Info.bound): bound = [] if self.info_requested(Info.converged): self.info_set(Info.converged, False) k = 0 while True: if self.info_requested(Info.time): tm = utils.time_cpu() tau = 2.0 / (float(k) + 3.0) function.set_mu(mu[k]) alpha_hat = function.alpha(beta) for i in xrange(len(alpha_hat)): u[i] = (1.0 - tau) * alpha[i] + tau * alpha_hat[i] mu.append((1.0 - tau) * mu[k]) betahat = function.betahat(u) beta = (1.0 - tau) * beta + tau * betahat alpha = function.V(u, betahat, L) upper_limit = mu[k + 1] * function.M() if self.info_requested(Info.time): t.append(utils.time_cpu() - tm) if self.info_requested(Info.fvalue): mu_old = function.get_mu() function.set_mu(0.0) f.append(function.f(beta)) function.set_mu(mu_old) if self.info_requested(Info.bound): # bound.append(2.0 * function.M() * mu[0] \ # / ((float(k) + 1.0) * (float(k) + 2.0))) bound.append(upper_limit) if upper_limit < self.eps and k >= self.min_iter - 1: if self.info_requested(Info.converged): self.info_set(Info.converged, True) break if k >= self.max_iter - 1 and k >= self.min_iter - 1: break k = k + 1 if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, k + 1) if self.info_requested(Info.time): self.info_set(Info.time, t) if self.info_requested(Info.fvalue): self.info_set(Info.fvalue, f) if self.info_requested(Info.mu): self.info_set(Info.mu, mu) if self.info_requested(Info.bound): self.info_set(Info.bound, bound) if self.info_requested(Info.beta): self.info_set(Info.beta, beta0) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return beta