def cholesky(A, sparse=True, verbose=True): """ Choose the best possible cholesky factorizor. if possible, import the Scikit-Sparse sparse Cholesky method. Permutes the output L to ensure A = L.H . L otherwise defaults to numpy's non-sparse version Parameters ---------- A : array-like array to decompose sparse : boolean, default: True whether to return a sparse array verbose : bool, default: True whether to print warnings """ if SKSPIMPORT: A = sp.sparse.csc_matrix(A) try: F = spcholesky(A) # permutation matrix P P = sp.sparse.lil_matrix(A.shape) p = F.P() P[np.arange(len(p)), p] = 1 # permute L = F.L() L = P.T.dot(L) except CholmodNotPositiveDefiniteError as e: raise NotPositiveDefiniteError('Matrix is not positive definite') if sparse: return L.T # upper triangular factorization return L.T.A # upper triangular factorization else: msg = 'Could not import Scikit-Sparse or Suite-Sparse.\n'\ 'This will slow down optimization for models with '\ 'monotonicity/convexity penalties and many splines.\n'\ 'See installation instructions for installing '\ 'Scikit-Sparse and Suite-Sparse via Conda.' if verbose: warnings.warn(msg) if sp.sparse.issparse(A): A = A.A try: L = sp.linalg.cholesky(A, lower=False) except LinAlgError as e: raise NotPositiveDefiniteError('Matrix is not positive definite') if sparse: return sp.sparse.csc_matrix(L) return L
def cholesky(A, sparse=True): """ Choose the best possible cholesky factorizor. if possible, import the Scikit-Sparse sparse Cholesky method. Permutes the output L to ensure A = L . L.H otherwise defaults to numpy's non-sparse version Parameters ---------- A : array-like array to decompose sparse : boolean, default: True whether to return a sparse array """ if SKSPIMPORT: A = sp.sparse.csc_matrix(A) F = spcholesky(A) # permutation matrix P P = sp.sparse.lil_matrix(A.shape) p = F.P() P[np.arange(len(p)), p] = 1 # permute try: L = F.L() L = P.T.dot(L) except CholmodNotPositiveDefiniteError as e: raise NotPositiveDefiniteError('Matrix is not positive definite') if sparse: return L return L.todense() else: msg = 'Could not import Scikit-Sparse or Suite-Sparse.\n'\ 'This will slow down optimization for models with '\ 'monotonicity/convexity penalties and many splines.\n'\ 'See installation instructions for installing '\ 'Scikit-Sparse and Suite-Sparse via Conda.' warnings.warn(msg) if sp.sparse.issparse(A): A = A.todense() try: L = np.linalg.cholesky(A) except LinAlgError as e: raise NotPositiveDefiniteError('Matrix is not positive definite') if sparse: return sp.sparse.csc_matrix(L) return L
def fit(self, X, y, w=None): """ Run Forest! Args: X (m x n): dataset y (n, 1): labels (real) Returns: w (n): estimated parameters t (float): elapsed time """ assert X.shape[0] == y.shape[0] y.reshape(len(y), ) # initializing if w is None: w = np.zeros((X.shape[1])) z_1 = np.zeros((X.shape[1], 1)) z_2 = np.zeros((X.shape[1], 1)) u_1 = np.zeros((X.shape[1], 1)) # w - z_1 u_2 = np.zeros((X.shape[1], 1)) # w - z_2 # ADMM loop keep_going = True n_iter = 0 hist = list() prim_res = list() start = time.time() # cache factorization and other pre-computable terms if self.cache: if self.sparse: temp = scipy.sparse.csc_matrix(np.dot(X.T, X)) L = spcholesky(temp, beta=self.rho) else: L = cholesky(np.dot(X.T, X) + self.rho * np.eye(X.shape[1]), lower=True) q = np.dot(X.T, y) q.shape = (q.size, 1) while keep_going and (n_iter <= self.max_iter): n_iter += 1 # w-update step if self.cache: g = 0.5 * self.rho * (z_1 + z_2 - u_1 - u_2) + q # back-solving if self.sparse: w = L.solve_Lt(L.solve_A(g)) else: w = scipy.linalg.solve(L.T, scipy.linalg.solve(L, g)) else: def fun(var): temp_1 = (0.5 * self.rho) * np.linalg.norm(w - z_1 + u_1) temp_2 = (0.5 * self.rho) * np.linalg.norm(w - z_2 + u_2) return f(X, y, var) + temp_1 + temp_2 opt_resul = minimize(fun, w, method='bfgs') w = opt_resul.x w.shape = (w.shape[0], 1) # z_i update step - these sub-steps can be done in parallel z_1_old = z_1.copy() z_2_old = z_2.copy() x1_hat = self.alpha * w + (1 - self.alpha) * z_1_old z_1 = prxopt.shrinkage(x1_hat + u_1, self.lamb / self.rho) x2_hat = self.alpha * w + (1 - self.alpha) * z_2_old z_2 = prxopt.proximal_constrained(x2_hat + u_2) # proj em C # dual variables update step u_1 = u_1 + (x1_hat - z_1) # dual variable for the 1st constraint u_2 = u_2 + (x2_hat - z_2) # dual variable for the 2nd constraint # monitor costs over time if PLOT_COST: hist.append(((np.dot(X, w) - y)**2).sum() + self.lamb * (abs(w).sum())) # critério de parada primal_res = 0.5 * ((w - z_1) + (w - z_2)) primal_res_norm = np.linalg.norm(primal_res) # monitor primal residual over time prim_res.append(primal_res_norm) dual_res = 0.5 * ((z_1 - z_1_old) + (z_2 - z_2_old)) dual_res_norm = np.linalg.norm(self.rho * dual_res) eps_pri = np.sqrt(X.shape[1]) * ABS_TOL + REL_TOL * \ max(np.linalg.norm(w), 0.5*(np.linalg.norm(-z_1)+np.linalg.norm(-z_2))) eps_dual = np.sqrt(X.shape[1]) * ABS_TOL + REL_TOL * 0.5 * \ (np.linalg.norm(u_1) + np.linalg.norm(u_2)) # variação de rho, baseado em Boyd, 2011, ADMM, pg. 20 if self.vary: if primal_res_norm > self.mu * dual_res_norm: self.rho *= self.tau_incr u_1 = u_1 / self.tau_incr u_2 = u_2 / self.tau_incr elif dual_res_norm > self.mu * primal_res_norm: self.rho *= (1 / self.tau_decr) u_1 = u_1 * self.tau_decr u_2 = u_2 * self.tau_decr # print informações de convergência? if VERBOSE: message = """|ADMM it.{} |r_norm: {:8.3f} |eps_pri: {:1.3f} |s_norm: {:8.3f} |eps_dual: {:1.3f} |obj: {:8.3f}""" value = ((1/2) * np.linalg.norm(np.dot(X, w) - y) ** 2 +\ self.lamb * np.sum(abs(w))) print( message.format(n_iter, primal_res_norm, eps_pri, dual_res_norm, eps_dual, value)) if primal_res_norm <= eps_pri and dual_res_norm <= eps_dual: keep_going = False if VERBOSE: print('Primal dual stopping criterion met.') # make sure it's feasible (w_i >= 0, i=1,...,d) # verify whether the constraint satisfiability is required when doing # early stop (few max_iters). If not, maybe is better to comment it out # as this project might be to harsh for early-stage solutions self.hist = hist # w_final = prxopt.proximal_constrained(w) w_final = z_2.copy() if PLOT_COST: # cost function plt.subplot(2, 1, 1) plt.plot(hist) plt.xlabel('iterations') plt.ylabel('Cost function') # primal residual: average of both residuals (one for constraint) plt.subplot(2, 1, 2) plt.semilogy(prim_res) plt.xlabel('iterations') plt.ylabel('Primal residual - Avg') plt.show(block=False) return w_final.flatten(), time.time() - start