def solve(self, problem: Problem, init_vars: np.ndarray = None, show_process: bool = False): if init_vars is None: init_vars = np.random.rand(problem.dim) self.reset() start = time.time() # Step1: calculate the gradient and its norm named 'ng' vars_current = init_vars vars_before = vars_current grad_value = problem.cal_gradient(vars_current) ng = np.linalg.norm(grad_value) grad_before = grad_value # ng_seq = [] # Step2: Iteration objv_current = problem.aim_func(vars_current) iter_count = 0 # Record self.cpu_time_arr.append(0) self.f_value_arr.append(objv_current) self.g_norm_arr.append(ng) while ng > self.tol and iter_count < self.max_iter: current = time.time() # INNER STEP1: get d_k d_k = -grad_value # INNER STEP2: get a_k alpha = self.s # initial alpha as s objv_next = problem.aim_func(vars_current + alpha * d_k) if iter_count == 0: while np.isnan(objv_next) or ( (objv_next - objv_current) > (self.gamma * alpha * grad_value @ d_k)): alpha *= self.sigma objv_next = problem.aim_func(vars_current + alpha * d_k) else: sk = vars_current - vars_before yk = grad_value - grad_before alpha = sk.T @ yk / (yk.T @ yk) # print(alpha) grad_before = grad_value vars_before = vars_current vars_current = vars_current + alpha * d_k # Update vars objv_current = problem.aim_func(vars_current) grad_value = problem.cal_gradient(vars_current) # print(grad_value,grad_before) ng = np.linalg.norm(grad_value) iter_count += 1 # ng_seq.append(ng) if show_process: print(f'iteration {iter_count} : {round(ng, 8)}', end=' ') print( f'time: {round(time.time() - current, 3)} second(self.s)') self.cpu_time_arr.append(time.time() - start) self.f_value_arr.append(objv_current) self.g_norm_arr.append(ng) self.final_solution = vars_current self.surface_points = problem.get_all_surface_points(vars_current) return vars_current, grad_value, objv_current, ng, iter_count
def solve(self, problem: Problem, init_vars: np.ndarray = None, show_process: bool = False): if init_vars is None: init_vars = np.random.rand(problem.dim) self.reset() start = time.time() vars_current = init_vars objv_current = problem.aim_func(vars_current) grad_value = problem.cal_gradient(vars_current) ng = np.linalg.norm(grad_value) # record self.cpu_time_arr.append(0) self.f_value_arr.append(objv_current) self.g_norm_arr.append(ng) iter_count = 0 while ng > self.tol and iter_count < self.max_iter: current = time.time() # INNER STEP1: get d_k, use s_k or -grad_value hessian = problem.cal_hessian(vars_current) s_k = None s_k_available = False if (np.linalg.det(hessian) >= 1e-6): # invertible s_k = -np.linalg.inv(hessian) @ grad_value if -grad_value @ s_k >= min( self.beta1, self.beta2 * np.linalg.norm(s_k, self.p)) * np.linalg.norm(s_k)**2: s_k_available = True if s_k_available: d_k = s_k else: d_k = -grad_value # INNER STEP2: get a_k alpha = self.s # initial alpha as self.s objv_next = problem.aim_func(vars_current + alpha * d_k) while np.isnan(objv_next) or ( (objv_next - objv_current) > (self.gamma * alpha * grad_value @ d_k)): alpha *= self.sigma objv_next = problem.aim_func(vars_current + alpha * d_k) vars_current = vars_current + alpha * d_k objv_current = problem.aim_func(vars_current) grad_value = problem.cal_gradient(vars_current) ng = np.linalg.norm(grad_value) iter_count += 1 if show_process: print(f'iteration {iter_count} : {round(ng, 8)}', end=' ') print( f'time: {round(time.time() - current, 3)} second(self.s)') # record self.cpu_time_arr.append(time.time() - start) self.f_value_arr.append(objv_current) self.g_norm_arr.append(ng) self.final_solution = vars_current self.surface_points = problem.get_all_surface_points(vars_current) return vars_current, grad_value, objv_current, ng, iter_count
def solve(self, problem: Problem, init_vars: np.ndarray = None, show_process: bool = False): if init_vars is None: init_vars = np.random.rand(problem.dim) self.reset() start = time.time() # Step1: calculate the gradient and its norm named 'ng' vars_current = init_vars grad_value = problem.cal_gradient(vars_current) ng = np.linalg.norm(grad_value) objv_current = problem.aim_func(vars_current) # Record self.cpu_time_arr.append(0) self.f_value_arr.append(objv_current) self.g_norm_arr.append(ng) # Step2: Iteration iter_count = 0 while iter_count < self.max_iter: current = time.time() # INNER STEP1: get d_k lambda_val = self.lambda_at(iter_count) d_k = project(vars_current - lambda_val * grad_value, problem.b_s) - vars_current d_k_norm = np.linalg.norm(d_k) if d_k_norm <= lambda_val * self.tol: break # INNER STEP2: get a_k alpha = self.s # initial alpha as self.s objv_next = problem.aim_func(vars_current + alpha * d_k) while np.isnan(objv_next) or ( (objv_next - objv_current) > (self.gamma * alpha * grad_value @ d_k)): alpha *= self.sigma objv_next = problem.aim_func(vars_current + alpha * d_k) vars_current = vars_current + alpha * d_k # Update vars objv_current = problem.aim_func(vars_current) grad_value = problem.cal_gradient(vars_current) ng = np.linalg.norm(grad_value) iter_count += 1 if show_process: print(f'iteration {iter_count} : {round(ng, 8)}', end=' ') print( f'time: {round(time.time() - current, 3)} second(self.s)') # Record self.cpu_time_arr.append(time.time() - start) self.f_value_arr.append(objv_current) self.g_norm_arr.append(ng) self.final_solution = vars_current self.surface_points = problem.get_all_surface_points(vars_current) self.support_points = problem.get_all_support_points() return vars_current, grad_value, objv_current, ng, iter_count
def solve(self, problem: Problem, init_vars: np.ndarray = None, show_process: bool = False): if init_vars is None: init_vars = np.random.rand(problem.dim) self.reset() start = time.time() # Step1: calculate the gradient and its norm named 'ng' vars_current = init_vars grad_value = problem.cal_gradient(vars_current) ng = np.linalg.norm(grad_value) hessian = problem.cal_hessian(vars_current) # ng_seq = [] # Step2: Iteration objv_current = problem.aim_func(vars_current) iter_count = 0 # Record self.cpu_time_arr.append(0) self.f_value_arr.append(objv_current) self.g_norm_arr.append(ng) while ng > self.tol and iter_count < self.max_iter: current = time.time() # INNER STEP1: get d_k d_k = -grad_value # INNER STEP2: get a_k #alpha = self.s # initial alpha as s alpha = -grad_value.T * d_k / (d_k.T @ hessian @ d_k) # print(alpha) vars_current = vars_current + alpha * d_k # Update vars objv_current = problem.aim_func(vars_current) grad_value = problem.cal_gradient(vars_current) ng = np.linalg.norm(grad_value) iter_count += 1 # ng_seq.append(ng) if show_process: print(f'iteration {iter_count} : {round(ng, 8)}', end=' ') print( f'time: {round(time.time() - current, 3)} second(self.s)') self.cpu_time_arr.append(time.time() - start) self.f_value_arr.append(objv_current) self.g_norm_arr.append(ng) self.final_solution = vars_current self.surface_points = problem.get_all_surface_points(vars_current) return vars_current, grad_value, objv_current, ng, iter_count
def solve(self, problem: Problem, init_vars: np.ndarray = None, show_process: bool = False): if init_vars is None: init_vars = np.random.rand(problem.dim) self.reset() # print('Backtracking') start = time.time() # Step1: calculate the gradient and its norm named 'ng' vars_current = init_vars paths = [init_vars] grad_value = problem.cal_gradient(vars_current) ng = np.linalg.norm(grad_value) ng_list = [np] # Step2: Compute the first iteration s_list = [] y_list = [] rho_list = [] hessian = np.identity(len(grad_value)) d_k = -hessian @ grad_value alpha = self.s # initial alpha as self.s objv_current = problem.aim_func(vars_current) ObjV_temp = problem.aim_func(vars_current + alpha * d_k) self.cpu_time_arr.append(0) self.f_value_arr.append(objv_current) self.g_norm_arr.append(ng) while np.isnan(ObjV_temp) or ((ObjV_temp - objv_current) > (self.gamma * alpha * grad_value @ d_k)): alpha *= self.sigma ObjV_temp = problem.aim_func(vars_current + alpha * d_k) s_list.append(alpha * d_k) y_list.append( problem.cal_gradient(vars_current + alpha * d_k) - problem.cal_gradient(vars_current)) rho_list.append(1 / (s_list[-1] @ y_list[-1] + 1e-8)) vars_current = vars_current + alpha * d_k objv_current = problem.aim_func(vars_current) grad_value = problem.cal_gradient(vars_current) ng = np.linalg.norm(grad_value) ng_list.append(np.log(ng)) paths.append(vars_current) # Step3: Iteration iter_count = 0 while ng > self.tol and iter_count < self.max_iter: current = time.time() # Compute d_k q = problem.cal_gradient(vars_current) objv_current = problem.aim_func(vars_current) a_list = [] for i in range(min(self.m, len(s_list))): a = rho_list[-(i + 1)] * (s_list[-(i + 1)] @ q) a_list.append(a) q = q - a * y_list[-(i + 1)] gamma = (s_list[-1] @ y_list[-1]) / ( (y_list[-1] @ y_list[-1]) + 1e-8) Hessian = gamma * np.identity(len(q)) r = Hessian @ q for i in range(min(self.m, len(s_list))): beta = rho_list[i] * (y_list[i] @ r) r = r + (a_list[-(i + 1)] - beta) * s_list[i] d_k = -r # Compute alpha alpha = self.s # initial alpha as self.s ObjV_temp = problem.aim_func(vars_current + alpha * d_k) while np.isnan(ObjV_temp) or (ObjV_temp - objv_current > self.gamma * alpha * (-ng**2)): alpha *= self.sigma ObjV_temp = problem.aim_func(vars_current + alpha * d_k) s_list.append(alpha * d_k) y_list.append( problem.cal_gradient(vars_current + alpha * d_k) - problem.cal_gradient(vars_current)) rho_list.append(1 / (s_list[-1] @ y_list[-1] + 1e-8)) if len(s_list) > 10: del s_list[0] del y_list[0] del rho_list[0] vars_current = vars_current + alpha * d_k # Update init_vars objv_current = problem.aim_func(vars_current) grad_value = problem.cal_gradient(vars_current) ng = np.linalg.norm(grad_value) iter_count += 1 ng_list.append(ng) paths.append(vars_current) self.cpu_time_arr.append(time.time() - start) self.f_value_arr.append(objv_current) self.g_norm_arr.append(ng) if show_process: print(f'iteration {iter_count} : {round(ng, 8)}', end=' ') print( f'time: {round(time.time() - current, 3)} second(self.s)') # output self.final_solution = vars_current self.surface_points = problem.get_all_surface_points(vars_current) return vars_current, grad_value, objv_current, ng, iter_count