def try_factorization_and_reallocation(kkt, linear_solver, reallocation_factor, max_iter, timer=None): if timer is None: timer = HierarchicalTimer() assert max_iter >= 1 for count in range(max_iter): timer.start('symbolic') """ Performance could be improved significantly by only performing symbolic factorization once. However, we first have to make sure the nonzero structure (and ordering of row and column arrays) of the KKT matrix never changes. We have not had time to test this thoroughly, yet. """ res = linear_solver.do_symbolic_factorization(matrix=kkt, raise_on_error=False) timer.stop('symbolic') if res.status == LinearSolverStatus.successful: timer.start('numeric') res = linear_solver.do_numeric_factorization(matrix=kkt, raise_on_error=False) timer.stop('numeric') status = res.status if status == LinearSolverStatus.not_enough_memory: linear_solver.increase_memory_allocation(reallocation_factor) else: break return status, count
def do_back_solve(self, rhs, timer=None): """ Performs a back solve with the factorized matrix. Should only be called after do_numeric_factorixation. Parameters ---------- rhs: MPIBlockVector timer: pyutilib.misc.timing.HierarchicalTimer Returns ------- result: MPIBlockVector """ if timer is None: timer = HierarchicalTimer() timer.start('back_solve') schur_complement_rhs = np.zeros(rhs.get_block(self.block_dim - 1).size, dtype='d') for ndx in self.local_block_indices: A = self.block_matrix.get_block(self.block_dim - 1, ndx) contribution = self.subproblem_solvers[ndx].do_back_solve( rhs.get_block(ndx)) schur_complement_rhs -= A.tocsr().dot(contribution.flatten()) res = np.zeros(rhs.get_block(self.block_dim - 1).shape[0], dtype='d') comm.Allreduce(schur_complement_rhs, res) schur_complement_rhs = rhs.get_block(self.block_dim - 1) + res result = rhs.copy_structure() coupling = self.schur_complement_solver.do_back_solve( schur_complement_rhs) for ndx in self.local_block_indices: A = self.block_matrix.get_block(self.block_dim - 1, ndx) result.set_block( ndx, self.subproblem_solvers[ndx].do_back_solve( rhs.get_block(ndx) - A.tocsr().transpose().dot(coupling.flatten()))) result.set_block(self.block_dim - 1, coupling) timer.stop('back_solve') return result
def check_convergence(self, barrier, timer=None): """ Parameters ---------- barrier: float timer: HierarchicalTimer Returns ------- primal_inf: float dual_inf: float complimentarity_inf: float """ if timer is None: timer = HierarchicalTimer() interface = self.interface slacks = interface.get_slacks() timer.start('grad obj') grad_obj = interface.get_obj_factor( ) * interface.evaluate_grad_objective() timer.stop('grad obj') timer.start('jac eq') jac_eq = interface.evaluate_jacobian_eq() timer.stop('jac eq') timer.start('jac ineq') jac_ineq = interface.evaluate_jacobian_ineq() timer.stop('jac ineq') timer.start('eq cons') eq_resid = interface.evaluate_eq_constraints() timer.stop('eq cons') timer.start('ineq cons') ineq_resid = interface.evaluate_ineq_constraints() - slacks timer.stop('ineq cons') primals = interface.get_primals() duals_eq = interface.get_duals_eq() duals_ineq = interface.get_duals_ineq() duals_primals_lb = interface.get_duals_primals_lb() duals_primals_ub = interface.get_duals_primals_ub() duals_slacks_lb = interface.get_duals_slacks_lb() duals_slacks_ub = interface.get_duals_slacks_ub() primals_lb = interface.primals_lb() primals_ub = interface.primals_ub() primals_lb_mod = primals_lb.copy() primals_ub_mod = primals_ub.copy() primals_lb_mod[np.isneginf( primals_lb)] = 0 # these entries get multiplied by 0 primals_ub_mod[np.isinf( primals_ub)] = 0 # these entries get multiplied by 0 ineq_lb = interface.ineq_lb() ineq_ub = interface.ineq_ub() ineq_lb_mod = ineq_lb.copy() ineq_ub_mod = ineq_ub.copy() ineq_lb_mod[np.isneginf( ineq_lb)] = 0 # these entries get multiplied by 0 ineq_ub_mod[np.isinf(ineq_ub)] = 0 # these entries get multiplied by 0 timer.start('grad_lag_primals') grad_lag_primals = grad_obj + jac_eq.transpose() * duals_eq grad_lag_primals += jac_ineq.transpose() * duals_ineq grad_lag_primals -= duals_primals_lb grad_lag_primals += duals_primals_ub timer.stop('grad_lag_primals') timer.start('grad_lag_slacks') grad_lag_slacks = (-duals_ineq - duals_slacks_lb + duals_slacks_ub) timer.stop('grad_lag_slacks') timer.start('bound resids') primals_lb_resid = (primals - primals_lb_mod) * duals_primals_lb - barrier primals_ub_resid = (primals_ub_mod - primals) * duals_primals_ub - barrier primals_lb_resid[np.isneginf(primals_lb)] = 0 primals_ub_resid[np.isinf(primals_ub)] = 0 slacks_lb_resid = (slacks - ineq_lb_mod) * duals_slacks_lb - barrier slacks_ub_resid = (ineq_ub_mod - slacks) * duals_slacks_ub - barrier slacks_lb_resid[np.isneginf(ineq_lb)] = 0 slacks_ub_resid[np.isinf(ineq_ub)] = 0 timer.stop('bound resids') if eq_resid.size == 0: max_eq_resid = 0 else: max_eq_resid = np.max(np.abs(eq_resid)) if ineq_resid.size == 0: max_ineq_resid = 0 else: max_ineq_resid = np.max(np.abs(ineq_resid)) primal_inf = max(max_eq_resid, max_ineq_resid) max_grad_lag_primals = np.max(np.abs(grad_lag_primals)) if grad_lag_slacks.size == 0: max_grad_lag_slacks = 0 else: max_grad_lag_slacks = np.max(np.abs(grad_lag_slacks)) dual_inf = max(max_grad_lag_primals, max_grad_lag_slacks) if primals_lb_resid.size == 0: max_primals_lb_resid = 0 else: max_primals_lb_resid = np.max(np.abs(primals_lb_resid)) if primals_ub_resid.size == 0: max_primals_ub_resid = 0 else: max_primals_ub_resid = np.max(np.abs(primals_ub_resid)) if slacks_lb_resid.size == 0: max_slacks_lb_resid = 0 else: max_slacks_lb_resid = np.max(np.abs(slacks_lb_resid)) if slacks_ub_resid.size == 0: max_slacks_ub_resid = 0 else: max_slacks_ub_resid = np.max(np.abs(slacks_ub_resid)) complimentarity_inf = max(max_primals_lb_resid, max_primals_ub_resid, max_slacks_lb_resid, max_slacks_ub_resid) return primal_inf, dual_inf, complimentarity_inf
def solve(self, interface, timer=None, report_timing=False): """ Parameters ---------- interface: pyomo.contrib.interior_point.interface.BaseInteriorPointInterface The interior point interface. This object handles the function evaluation, building the KKT matrix, and building the KKT right hand side. timer: HierarchicalTimer report_timing: bool """ linear_solver = self.linear_solver max_iter = self.max_iter tol = self.tol if timer is None: timer = HierarchicalTimer() timer.start('IP solve') timer.start('init') self._barrier_parameter = 0.1 self.set_interface(interface) t0 = time.time() primals = interface.init_primals().copy() slacks = interface.init_slacks().copy() duals_eq = interface.init_duals_eq().copy() duals_ineq = interface.init_duals_ineq().copy() duals_primals_lb = interface.init_duals_primals_lb().copy() duals_primals_ub = interface.init_duals_primals_ub().copy() duals_slacks_lb = interface.init_duals_slacks_lb().copy() duals_slacks_ub = interface.init_duals_slacks_ub().copy() self.process_init(primals, interface.primals_lb(), interface.primals_ub()) self.process_init(slacks, interface.ineq_lb(), interface.ineq_ub()) self.process_init_duals_lb(duals_primals_lb, self.interface.primals_lb()) self.process_init_duals_ub(duals_primals_ub, self.interface.primals_ub()) self.process_init_duals_lb(duals_slacks_lb, self.interface.ineq_lb()) self.process_init_duals_ub(duals_slacks_ub, self.interface.ineq_ub()) interface.set_barrier_parameter(self._barrier_parameter) alpha_primal_max = 1 alpha_dual_max = 1 self.logger.info('{_iter:<6}' '{objective:<11}' '{primal_inf:<11}' '{dual_inf:<11}' '{compl_inf:<11}' '{barrier:<11}' '{alpha_p:<11}' '{alpha_d:<11}' '{reg:<11}' '{time:<7}'.format(_iter='Iter', objective='Objective', primal_inf='Prim Inf', dual_inf='Dual Inf', compl_inf='Comp Inf', barrier='Barrier', alpha_p='Prim Step', alpha_d='Dual Step', reg='Reg', time='Time')) reg_coef = 0 timer.stop('init') status = InteriorPointStatus.error for _iter in range(max_iter): self._iter = _iter interface.set_primals(primals) interface.set_slacks(slacks) interface.set_duals_eq(duals_eq) interface.set_duals_ineq(duals_ineq) interface.set_duals_primals_lb(duals_primals_lb) interface.set_duals_primals_ub(duals_primals_ub) interface.set_duals_slacks_lb(duals_slacks_lb) interface.set_duals_slacks_ub(duals_slacks_ub) timer.start('convergence check') primal_inf, dual_inf, complimentarity_inf = \ self.check_convergence(barrier=0, timer=timer) timer.stop('convergence check') objective = interface.evaluate_objective() self.logger.info('{_iter:<6}' '{objective:<11.2e}' '{primal_inf:<11.2e}' '{dual_inf:<11.2e}' '{compl_inf:<11.2e}' '{barrier:<11.2e}' '{alpha_p:<11.2e}' '{alpha_d:<11.2e}' '{reg:<11.2e}' '{time:<7.3f}'.format( _iter=_iter, objective=objective, primal_inf=primal_inf, dual_inf=dual_inf, compl_inf=complimentarity_inf, barrier=self._barrier_parameter, alpha_p=alpha_primal_max, alpha_d=alpha_dual_max, reg=reg_coef, time=time.time() - t0)) if max(primal_inf, dual_inf, complimentarity_inf) <= tol: status = InteriorPointStatus.optimal break timer.start('convergence check') primal_inf, dual_inf, complimentarity_inf = \ self.check_convergence(barrier=self._barrier_parameter, timer=timer) timer.stop('convergence check') if max(primal_inf, dual_inf, complimentarity_inf) \ <= 0.1 * self._barrier_parameter: # This comparison is made with barrier problem infeasibility. # Sometimes have trouble getting dual infeasibility low enough self.update_barrier_parameter() interface.set_barrier_parameter(self._barrier_parameter) timer.start('eval') timer.start('eval kkt') kkt = interface.evaluate_primal_dual_kkt_matrix(timer=timer) timer.stop('eval kkt') timer.start('eval rhs') rhs = interface.evaluate_primal_dual_kkt_rhs(timer=timer) timer.stop('eval rhs') timer.stop('eval') # Factorize linear system timer.start('factorize') reg_coef = self.factorize(kkt=kkt, timer=timer) timer.stop('factorize') timer.start('back solve') with self.linear_solve_context: self.logger.info('Iter: %s' % self._iter) delta = linear_solver.do_back_solve(rhs) timer.stop('back solve') interface.set_primal_dual_kkt_solution(delta) timer.start('frac boundary') alpha_primal_max, alpha_dual_max = \ self.fraction_to_the_boundary() timer.stop('frac boundary') delta_primals = interface.get_delta_primals() delta_slacks = interface.get_delta_slacks() delta_duals_eq = interface.get_delta_duals_eq() delta_duals_ineq = interface.get_delta_duals_ineq() delta_duals_primals_lb = interface.get_delta_duals_primals_lb() delta_duals_primals_ub = interface.get_delta_duals_primals_ub() delta_duals_slacks_lb = interface.get_delta_duals_slacks_lb() delta_duals_slacks_ub = interface.get_delta_duals_slacks_ub() primals += alpha_primal_max * delta_primals slacks += alpha_primal_max * delta_slacks duals_eq += alpha_dual_max * delta_duals_eq duals_ineq += alpha_dual_max * delta_duals_ineq duals_primals_lb += alpha_dual_max * delta_duals_primals_lb duals_primals_ub += alpha_dual_max * delta_duals_primals_ub duals_slacks_lb += alpha_dual_max * delta_duals_slacks_lb duals_slacks_ub += alpha_dual_max * delta_duals_slacks_ub timer.stop('IP solve') if report_timing: print(timer) return status
def evaluate_primal_dual_kkt_rhs(self, timer=None): if timer is None: timer = HierarchicalTimer() timer.start('eval grad obj') grad_obj = self.get_obj_factor() * self.evaluate_grad_objective() timer.stop('eval grad obj') timer.start('eval jac') jac_eq = self._nlp.evaluate_jacobian_eq() jac_ineq = self._nlp.evaluate_jacobian_ineq() timer.stop('eval jac') timer.start('eval cons') eq_resid = self._nlp.evaluate_eq_constraints() ineq_resid = self._nlp.evaluate_ineq_constraints() - self._slacks timer.stop('eval cons') timer.start('grad_lag_primals') grad_lag_primals = (grad_obj + jac_eq.transpose() * self._nlp.get_duals_eq() + jac_ineq.transpose() * self._nlp.get_duals_ineq() - self._barrier / (self._nlp.get_primals() - self._nlp.primals_lb()) + self._barrier / (self._nlp.primals_ub() - self._nlp.get_primals())) timer.stop('grad_lag_primals') timer.start('grad_lag_slacks') grad_lag_slacks = (-self._nlp.get_duals_ineq() - self._barrier / (self._slacks - self._nlp.ineq_lb()) + self._barrier / (self._nlp.ineq_ub() - self._slacks)) timer.stop('grad_lag_slacks') rhs = BlockVector(4) rhs.set_block(0, grad_lag_primals) rhs.set_block(1, grad_lag_slacks) rhs.set_block(2, eq_resid) rhs.set_block(3, ineq_resid) rhs = -rhs return rhs
def evaluate_primal_dual_kkt_matrix(self, timer=None): if timer is None: timer = HierarchicalTimer() timer.start('eval hess') hess_block = self._nlp.evaluate_hessian_lag() timer.stop('eval hess') timer.start('eval jac') jac_eq = self._nlp.evaluate_jacobian_eq() jac_ineq = self._nlp.evaluate_jacobian_ineq() timer.stop('eval jac') duals_primals_lb = self._duals_primals_lb duals_primals_ub = self._duals_primals_ub duals_slacks_lb = self._duals_slacks_lb duals_slacks_ub = self._duals_slacks_ub primals = self._nlp.get_primals() timer.start('hess block') data = (duals_primals_lb/(primals - self._nlp.primals_lb()) + duals_primals_ub/(self._nlp.primals_ub() - primals)) n = self._nlp.n_primals() indices = np.arange(n) hess_block.row = np.concatenate([hess_block.row, indices]) hess_block.col = np.concatenate([hess_block.col, indices]) hess_block.data = np.concatenate([hess_block.data, data]) timer.stop('hess block') timer.start('slack block') data = (duals_slacks_lb/(self._slacks - self._nlp.ineq_lb()) + duals_slacks_ub/(self._nlp.ineq_ub() - self._slacks)) n = self._nlp.n_ineq_constraints() indices = np.arange(n) slack_block = scipy.sparse.coo_matrix((data, (indices, indices)), shape=(n, n)) timer.stop('slack block') timer.start('regularization block') eq_reg_blk = scipy.sparse.identity(self._nlp.n_eq_constraints(), format='coo') eq_reg_blk.data.fill(0) timer.stop('regularization block') timer.start('set block') kkt = BlockMatrix(4, 4) kkt.set_block(0, 0, hess_block) kkt.set_block(1, 1, slack_block) kkt.set_block(2, 0, jac_eq) kkt.set_block(0, 2, jac_eq.transpose()) kkt.set_block(3, 0, jac_ineq) kkt.set_block(0, 3, jac_ineq.transpose()) kkt.set_block(3, 1, -scipy.sparse.identity( self._nlp.n_ineq_constraints(), format='coo')) kkt.set_block(1, 3, -scipy.sparse.identity( self._nlp.n_ineq_constraints(), format='coo')) kkt.set_block(2, 2, eq_reg_blk) timer.stop('set block') return kkt
def test_hierarchical_timer(self): timer = HierarchicalTimer() timer.start('all') for i in range(10): timer.start('a') for i in range(5): timer.start('aa') timer.stop('aa') timer.start('ab') timer.stop('ab') timer.stop('a') timer.start('b') timer.stop('b') timer.start('a') with self.assertRaisesRegex( ValueError, 'all is not the currently active timer. The only timer that can currently be stopped is all.a' ): timer.stop('all') timer.stop('a') timer.stop('all') a_percent = timer.get_relative_percent_time('all.a') aa_percent = timer.get_relative_percent_time('all.a.aa') aa_total_percent = timer.get_total_percent_time('all.a.aa') self.assertAlmostEqual(aa_total_percent, a_percent / 100 * aa_percent / 100 * 100) self.assertAlmostEqual(timer.get_num_calls('all.a'), 11) self.assertAlmostEqual(timer.get_num_calls('all.a.ab'), 10) self.assertAlmostEqual(timer.get_num_calls('all.a.aa'), 50) timer.get_total_time('all.b') print(timer)
def do_numeric_factorization( self, matrix: MPIBlockMatrix, raise_on_error: bool = True, timer: Optional[HierarchicalTimer] = None) -> LinearSolverResults: """ Perform numeric factorization: * perform numeric factorization on each diagonal block * form and communicate the Schur-Complement * factorize the schur-complement This method should only be called after do_symbolic_factorization. Parameters ---------- matrix: MPIBlockMatrix A Pynumero MPIBlockMatrix. This is the A matrix in Ax=b raise_on_error: bool If False, an error will not be raised if an error occurs during symbolic factorization. Instead the status attribute of the results object will indicate an error ocurred. timer: pyutilib.misc.timing.HierarchicalTimer A timer for profiling. Returns ------- res: LinearSolverResults The results object """ if timer is None: timer = HierarchicalTimer() timer.start('numeric') self.block_matrix = block_matrix = matrix res = LinearSolverResults() res.status = LinearSolverStatus.successful timer.start('form SC') for ndx in self.local_block_indices: timer.start('factorize') sub_res = self.subproblem_solvers[ndx].do_numeric_factorization( matrix=block_matrix.get_block(ndx, ndx), raise_on_error=False) timer.stop('factorize') _process_sub_results(res, sub_res) if res.status not in { LinearSolverStatus.successful, LinearSolverStatus.warning }: break res = _gather_results(res) if res.status not in { LinearSolverStatus.successful, LinearSolverStatus.warning }: if raise_on_error: raise RuntimeError( 'Numeric factorization unsuccessful; status: ' + str(res.status)) else: return res # in a scipy csr_matrix, # data contains the values # indices contains the column indices # indptr contains the number of nonzeros in the row self.schur_complement.data = np.zeros(self.schur_complement.data.size, dtype=np.double) for ndx in self.local_block_indices: A = block_matrix.get_block(self.block_dim - 1, ndx).tocsr() _rhs = np.zeros(A.shape[1], dtype=np.double) solver = self.subproblem_solvers[ndx] for row_ndx in range(A.shape[0]): row_nnz = A.indptr[row_ndx + 1] - A.indptr[row_ndx] if row_nnz != 0: for indptr in range(A.indptr[row_ndx], A.indptr[row_ndx + 1]): col = A.indices[indptr] val = A.data[indptr] _rhs[col] += val timer.start('back solve') contribution = solver.do_back_solve(_rhs) timer.stop('back solve') timer.start('dot product') contribution = A.dot(contribution) timer.stop('dot product') nonzero_contribution_indices = contribution.nonzero()[0] sc_col = row_ndx for sc_row in nonzero_contribution_indices: val_ndx = self.sc_coordinate_to_value_ndx_map[(sc_row, sc_col)] self.schur_complement.data[val_ndx] -= contribution[ sc_row] for indptr in range(A.indptr[row_ndx], A.indptr[row_ndx + 1]): col = A.indices[indptr] val = A.data[indptr] _rhs[col] -= val timer.start('communicate') sc = np.zeros(self.schur_complement.data.size, dtype=np.double) comm.Allreduce(self.schur_complement.data, sc) self.schur_complement.data = sc sc = self.schur_complement + block_matrix.get_block( self.block_dim - 1, self.block_dim - 1) timer.stop('communicate') timer.stop('form SC') timer.start('factor SC') sub_res = self.schur_complement_solver.do_symbolic_factorization( sc, raise_on_error=raise_on_error) _process_sub_results(res, sub_res) if res.status not in { LinearSolverStatus.successful, LinearSolverStatus.warning }: return res sub_res = self.schur_complement_solver.do_numeric_factorization(sc) _process_sub_results(res, sub_res) timer.stop('factor SC') timer.stop('numeric') return res
def do_symbolic_factorization( self, matrix: MPIBlockMatrix, raise_on_error: bool = True, timer: Optional[HierarchicalTimer] = None) -> LinearSolverResults: """ Perform symbolic factorization. This performs symbolic factorization for each diagonal block and collects some information on the structure of the schur complement for sparse communication in the numeric factorization phase. Parameters ---------- matrix: MPIBlockMatrix A Pynumero MPIBlockMatrix. This is the A matrix in Ax=b raise_on_error: bool If False, an error will not be raised if an error occurs during symbolic factorization. Instead the status attribute of the results object will indicate an error ocurred. timer: pyutilib.misc.timing.HierarchicalTimer A timer for profiling. Returns ------- res: LinearSolverResults The results object """ if timer is None: timer = HierarchicalTimer() timer.start('symbolic') block_matrix = matrix nbrows, nbcols = block_matrix.bshape if nbrows != nbcols: raise ValueError('The block matrix provided is not square.') self.block_dim = nbrows nrows, ncols = block_matrix.shape if nrows != ncols: raise ValueError('The block matrix provided is not square.') self.dim = nrows # split up the blocks between ranks self.local_block_indices = list() self.block_indices_by_rank = {_rank: list() for _rank in range(size)} for ndx in range(self.block_dim - 1): self.block_indices_by_rank[block_matrix.rank_ownership[ ndx, ndx]].append(ndx) if ((block_matrix.rank_ownership[ndx, ndx] == rank) or (block_matrix.rank_ownership[ndx, ndx] == -1 and rank == 0)): self.local_block_indices.append(ndx) res = LinearSolverResults() res.status = LinearSolverStatus.successful timer.start('factorize') for ndx in self.local_block_indices: sub_res = self.subproblem_solvers[ndx].do_symbolic_factorization( matrix=block_matrix.get_block(ndx, ndx), raise_on_error=False) _process_sub_results(res, sub_res) if res.status not in { LinearSolverStatus.successful, LinearSolverStatus.warning }: break timer.stop('factorize') res = _gather_results(res) if res.status not in { LinearSolverStatus.successful, LinearSolverStatus.warning }: if raise_on_error: raise RuntimeError( 'Symbolic factorization unsuccessful; status: ' + str(res.status)) else: return res timer.start('sc_structure') self._get_sc_structure(block_matrix=block_matrix) timer.stop('sc_structure') timer.stop('symbolic') return res