def objective_l2_sq(beta, game_matrix_list, l_penalty): ''' compute the objective of the model (neg_log_like + l2_square) ---------- Input: beta: TxN array or a TN vector game_matrix_list: TxNxN array ---------- Output: objective: negative log likelihood + squared l2 penalty ''' # reshape beta into TxN array T, N = game_matrix_list.shape[0:2] beta = np.reshape(beta, [T, N]) # compute l2 penalty l2_penalty = np.sum(np.square(beta[:-1] - beta[1:])) return model.neg_log_like(beta, game_matrix_list) + l_penalty * l2_penalty
def objective_l1(beta, game_matrix_list, l_penalty): ''' compute the objective of the model (neg_log_like + l1) ---------- Input: beta: TxN array or a TN vector game_matrix_list: TxNxN array l: coefficient of penalty term ---------- Output: objective: negative log likelihood + l1 penalty ''' # reshape beta into TxN array T, N = game_matrix_list.shape[0:2] beta = np.reshape(beta, [T, N]) # compute l2 penalty diff = beta[:-1] - beta[1:] l1_penalty = sum([np.linalg.norm(diff[i], 1) for i in range(T - 1)]) return model.neg_log_like(beta, game_matrix_list) + l_penalty * l1_penalty
def admm_l1(data, l_penalty=1, max_iter=1000, ths=1e-12, eta=None, step_init=1, max_back=200, a=0.01, b=0.3, beta_init=None, verbose=False, out=sys.stdout, return_b_obj=False): # initialize optimization T, N = data.shape[0:2] if beta_init is None: beta = np.zeros(data.shape[:2]).reshape((N * T, 1)) else: beta = beta_init.reshape((N * T, 1)) # optimization parameters paras = [step_init, ths, max_iter, max_back, a, b] A = np.zeros(((T - 1) * N, T * N)) for i in range(N): for t in range(T - 1): A[t * N + i, (t + 1) * N + i] = 1 A[t * N + i, (t) * N + i] = -1 if eta is None: eta = 20 * l_penalty thetak = np.zeros(((T - 1) * N, 1)) muk = A @ beta - thetak # initialize record objective_admm_b_l1 = [objective_l1(beta, data, l_penalty)] objective_admm = [ model.neg_log_like(beta, data) + l_penalty * np.linalg.norm(thetak, 1) ] if verbose: out.write("initial objective value: %f\n" % objective_admm[-1]) out.flush() # iteration for i in range(max_iter): # compute gradient beta = admm_sub_beta(data, T, N, A, l_penalty, eta, beta, muk, thetak, paras, out) thetak = prox_l1(l_penalty / eta, A @ beta + muk / eta) muk = muk + eta * (A @ beta - thetak) # objective value objective_admm_b_l1.append(objective_l1(beta, data, l_penalty)) objective_admm.append( model.neg_log_like(beta, data) + l_penalty * np.linalg.norm(thetak, 1)) # if verbose: # print("%d-th ADMM, objective value: %f"%(i+1, objective_admm_b_l1[-1])) # if abs(objective_admm_b_l1[-2] - objective_admm_b_l1[-1]) < ths: # print("Converged!") # break if verbose: out.write("%d-th ADMM, objective value: %f\n" % (i + 1, objective_admm[-1])) out.flush() if objective_admm[-2] - objective_admm[-1] < ths: if verbose: out.write("Converged!\n") out.flush() break elif i >= max_iter - 1: if verbose: out.write("Not converged.\n") out.flush() beta = beta - sum(beta[0:N]) / N beta = beta.reshape((T, N)) if return_b_obj: return (objective_admm, objective_admm_b_l1), beta return objective_admm, beta
def obj_amlag(beta, data, A, muk, eta, thetak): return model.neg_log_like(beta, data) + (A @ beta).T @ muk + \ eta / 2 * np.linalg.norm(A @ beta - thetak) ** 2
def pgd_l2_sq(data, l_penalty=1, max_iter=1000, ths=1e-12, step_init=0.5, max_back=200, a=0.2, b=0.5, beta_init=None, verbose=False, out=sys.stdout): # initialize optimization T, N = data.shape[0:2] if beta_init is None: beta = np.zeros(data.shape[:2]) else: beta = beta_init nll = model.neg_log_like(beta, data) # initialize record objective_wback = [objective_l2_sq(beta, data, l_penalty)] if verbose: out.write("initial objective value: %f\n" % objective_wback[-1]) out.flush() # iteration for i in range(max_iter): # compute gradient gradient = model.grad_nl(beta, data).reshape([T, N]) # backtracking line search s = step_init for j in range(max_back): beta_new = prox_l2_sq(beta - s * gradient, s, l_penalty) beta_diff = beta_new - beta nll_new = model.neg_log_like(beta_new, data) nll_back = (nll + np.sum(gradient * beta_diff) + np.sum(np.square(beta_diff)) / (2 * s)) if nll_new <= nll_back: break s *= b # proximal gradient update beta = beta_new nll = nll_new # record objective value objective_wback.append(objective_l2_sq(beta, data, l_penalty)) if verbose: out.write("%d-th PGD, objective value: %f\n" % (i + 1, objective_wback[-1])) out.flush() if abs(objective_wback[-2] - objective_wback[-1]) < ths: if verbose: out.write("Converged!\n") out.flush() break elif i >= max_iter - 1: if verbose: out.write("Not converged.\n") out.flush() return objective_wback, beta