def __init__(self, D_in, D_out, a_bound): super(OptLayer, self).__init__() self.W = torch.nn.Parameter(1e-3 * torch.randn(D_out, D_in)) self.b = torch.nn.Parameter(1e-3 * torch.randn(D_out)) u = torch.as_tensor(a_bound) y = cp.Variable(D_out) Wtilde = cp.Variable((D_out, D_in)) W = cp.Parameter((D_out, D_in)) b = cp.Parameter(D_out) x = cp.Parameter(D_in) obj = cp.Minimize(cp.sum_squares(Wtilde @ x - b - y)) cons = [cp.sum(y) == env.nbikes, 0 <= y, y <= u, Wtilde == W] prob = cp.Problem(obj, cons) self.layer = CvxpyLayer(prob, [W, b, x], [y])
def projection_eu(self, o, d): r = self.r k = self.od_serial[o, d] if len(self.f[int(k)]) == 1: return self.f[int(k)] flow = cp.Variable(len(self.f[int(k)])) b = cp.Parameter(len(self.f[int(k)])) constraints = [flow >= 0, sum(flow) == self.q[k]] objective = cp.Minimize(cp.pnorm(flow - b, p=2)) problem = cp.Problem(objective, constraints) assert problem.is_dpp() b_tch = self.f[int(k)] - r * self.cost[int(k)] cvxpylayer = CvxpyLayer(problem, parameters=[b], variables=[flow]) solution, = cvxpylayer(b_tch) solution = solution.clamp(0) return solution
def __init__(self, n_assets, temperature=1, max_weight=1): super().__init__() if n_assets * max_weight < 1: raise ValueError('One cannot create fully invested portfolio with the given max_weight') self.n_assets = n_assets self.temperature = temperature # Construct convex optimization problem x = cp.Parameter(n_assets) w = cp.Variable(n_assets) obj = cp.sum_squares(x - w) cons = [cp.sum(w) == 1, 0. <= w, w <= max_weight] prob = cp.Problem(cp.Minimize(obj), cons) self.layer = CvxpyLayer(prob, parameters=[x], variables=[w])
def __init__(self, n_assets, max_weight=1): """Construct.""" super().__init__() covmat_sqrt = cp.Parameter((n_assets, n_assets)) b = cp.Parameter(n_assets, nonneg=True) w = cp.Variable(n_assets) term_1 = 0.5 * cp.sum_squares(covmat_sqrt @ w) term_2 = b @ cp.log(w) objective = cp.Minimize(term_1 - term_2) # refer [2] constraint = [cp.sum(w) == 1, w >= 0, w <= max_weight] # refer [2] prob = cp.Problem(objective, constraint) assert prob.is_dpp() self.cvxpylayer = CvxpyLayer(prob, parameters=[covmat_sqrt, b], variables=[w])
def test_example(self): n, m = 2, 3 x = cp.Variable(n) A = cp.Parameter((m, n)) b = cp.Parameter(m) constraints = [x >= 0] objective = cp.Minimize(0.5 * cp.pnorm(A @ x - b, p=1)) problem = cp.Problem(objective, constraints) assert problem.is_dpp() cvxpylayer = CvxpyLayer(problem, parameters=[A, b], variables=[x]) A_tch = torch.randn(m, n, requires_grad=True) b_tch = torch.randn(m, requires_grad=True) # solve the problem solution, = cvxpylayer(A_tch, b_tch) # compute the gradient of the sum of the solution with respect to A, b solution.sum().backward()
def ssvr_cvxpy(X, y, hyperparam, idx_train, idx_val): Xtrain, Xtest, ytrain, ytest = map( torch.from_numpy, [X[idx_train, :], X[idx_val], y[idx_train], y[idx_val]]) n_samples_train, n_features = Xtrain.shape # set up variables and parameters beta_cp = cp.Variable(n_features) xi_cp = cp.Variable(n_samples_train) xi_star_cp = cp.Variable(n_samples_train) C_cp = cp.Parameter(nonneg=True) epsilon_cp = cp.Parameter(nonneg=True) # set up objective loss = cp.sum_squares(beta_cp) / 2 reg = C_cp * cp.sum(xi_cp + xi_star_cp) objective = loss + reg # define constraints constraints = [ ytrain - Xtrain @ beta_cp <= epsilon_cp + xi_cp, Xtrain @ beta_cp - ytrain <= epsilon_cp + xi_star_cp, xi_cp >= 0.0, xi_star_cp >= 0.0, cp.sum(beta_cp) == 1, beta_cp >= 0.0 ] # define problem problem = cp.Problem(cp.Minimize(objective), constraints) assert problem.is_dpp() # solve problem layer = CvxpyLayer(problem, parameters=[C_cp, epsilon_cp], variables=[beta_cp]) hyperparam_th = torch.tensor(hyperparam, requires_grad=True) beta_, = layer(hyperparam_th[0], hyperparam_th[1]) # get test loss and it's gradient test_loss = (Xtest @ beta_ - ytest).pow(2).mean() test_loss.backward() val = test_loss.detach().numpy() grad = np.array(hyperparam_th.grad) return val, grad
def init_QP(self): """ Setting up the matrices Q, G, h for the QP """ if self.QP == "qpth": # Using the qpth library for the QP self.Q = 2.0*torch.eye(self.size, self.size, device=self.device) self.e = torch.Tensor() self.e = (self.e).to(device=self.device) # Create the finite-difference matrix T = self.grid.item() D = torch.zeros(self.size-1, self.size, device=self.device) for i in range(self.size-1): D[i, i] = -1.0/T D[i, i+1] = 1.0/T self.G = torch.cat([D, -D], dim=0) self.h = torch.ones(2*(self.size-1), device=self.device) elif self.QP == "cvxpy": # Using the cvxpylayers library for the QP Q = 2.0*np.eye(self.size) p = cp.Parameter(self.size) # Create the finite-difference matrix D = np.zeros([self.size-1, self.size]) size = self.grid.item() for i in range(self.size-1): D[i, i] = -1.0/size D[i, i+1] = 1.0/size G = np.concatenate((D, -D), axis=0) h = np.ones(2*(self.size-1)) # Create the QP x = cp.Variable(self.size) objective = cp.Minimize((1/2)*cp.quad_form(x, Q) + p.T @ x) constraints = [G @ x <= h] problem = cp.Problem(objective, constraints) self.qp = CvxpyLayer(problem, parameters=[p], variables=[x])
def __init__(self, n_assets, max_weight=1): """Construct.""" super().__init__() covmat_sqrt = cp.Parameter((n_assets, n_assets)) rets = cp.Parameter(n_assets) alpha = cp.Parameter(nonneg=True) w = cp.Variable(n_assets) ret = rets @ w risk = cp.sum_squares(covmat_sqrt @ w) reg = alpha * (cp.norm(w)**2) prob = cp.Problem(cp.Maximize(ret - risk - reg), [cp.sum(w) == 1, w >= 0, w <= max_weight]) assert prob.is_dpp() self.cvxpylayer = CvxpyLayer(prob, parameters=[rets, covmat_sqrt, alpha], variables=[w])
def set_cvx_layer(self, batch_size, device): x = cp.Variable((batch_size, 7)) theta_max = cp.Parameter((batch_size, 7)) theta_min = cp.Parameter((batch_size, 7)) theta = cp.Parameter((batch_size, 7)) constraints = [theta - x <= theta_max, theta + x >= theta_min] objective = cp.Minimize(cp.pnorm(x)) problem = cp.Problem(objective, constraints) assert problem.is_dpp() self.cvxpylayer = CvxpyLayer(problem, parameters=[theta_max, theta_min, theta], variables=[x]) eps = 1e-10 self.theta_max_torch = util.deg2rad( torch.tensor([180., 140., 140., 140., 48., 48., 48.], requires_grad=True)).to(device) - eps self.theta_max_torch = self.theta_max_torch.unsqueeze(0).repeat( batch_size, 1) + eps self.theta_min_torch = torch.zeros((batch_size, 7), requires_grad=True).to(device)
def enet_cvxpy(X, y, lambda_alpha, idx_train, idx_val): Xtrain, Xtest, ytrain, ytest = map( torch.from_numpy, [X[idx_train, :], X[idx_val], y[idx_train], y[idx_val]]) n_samples_train, n_features = Xtrain.shape # set up variables and parameters beta_cp = cp.Variable(n_features) lambda_cp = cp.Parameter(nonneg=True) alpha_cp = cp.Parameter(nonneg=True) # set up objective loss = ((1 / (2 * n_samples_train)) * cp.sum(cp.square(Xtrain @ beta_cp - ytrain))) reg = (lambda_cp * cp.norm1(beta_cp) + alpha_cp * cp.sum_squares(beta_cp) / 2) objective = loss + reg # define problem problem = cp.Problem(cp.Minimize(objective)) assert problem.is_dpp() # solve problem layer = CvxpyLayer(problem, [lambda_cp, alpha_cp], [beta_cp]) lambda_alpha_th = torch.tensor(lambda_alpha, requires_grad=True) beta_, = layer(lambda_alpha_th[0], lambda_alpha_th[1], solver_args={ 'eps': 1e-6, 'max_iters': 2000 }) # get test loss and its gradient test_loss = (Xtest @ beta_ - ytest).pow(2).mean() test_loss.backward() val = test_loss.detach().numpy() grad = np.array(lambda_alpha_th.grad) return val, grad
def forward(self, A_vec): if A_vec.dim() < 2: A_vec = A_vec.unsqueeze(dim=0) if A_vec.shape[1] == 16: A = A_from_16_vec(A_vec) else: A = convert_Avec_to_A(A_vec) sdp_solver = CvxpyLayer(self.prob, parameters=[self.A], variables=[self.X]) X, = sdp_solver(A) del(sdp_solver) x = x_from_xxT(X) if x.dim() < 2: x = x.unsqueeze(dim=0) r_vec = x[:, :9] rotmat = r_vec.view(-1, 3,3).transpose(1,2) return rotmat.squeeze()
def test_broadcasting(self): set_seed(243) n_batch, m, n = 2, 100, 20 A = cp.Parameter((m, n)) b = cp.Parameter(m) x = cp.Variable(n) obj = cp.sum_squares(A@x - b) + cp.sum_squares(x) prob = cp.Problem(cp.Minimize(obj)) prob_th = CvxpyLayer(prob, [A, b], [x]) A_th = torch.randn(m, n).double().requires_grad_() b_th = torch.randn(m).double().unsqueeze(0).repeat(n_batch, 1) \ .requires_grad_() b_th_0 = b_th[0] x = prob_th(A_th, b_th, solver_args={"eps": 1e-10})[0] def lstsq( A, b): return torch.solve( (A.t() @ b).unsqueeze(1), A.t() @ A + torch.eye(n).double())[0] x_lstsq = lstsq(A_th, b_th_0) grad_A_cvxpy, grad_b_cvxpy = grad(x.sum(), [A_th, b_th]) grad_A_lstsq, grad_b_lstsq = grad(x_lstsq.sum(), [A_th, b_th_0]) self.assertAlmostEqual( torch.norm( grad_A_cvxpy / n_batch - grad_A_lstsq).item(), 0.0) self.assertAlmostEqual( torch.norm( grad_b_cvxpy[0] - grad_b_lstsq).item(), 0.0)
def __init__(self, temperature=1, formulation="analytical", n_assets=None, max_weight=1): super().__init__() self.temperature = temperature if formulation not in {"analytical", "variational"}: raise ValueError("Unrecognized formulation {}".format(formulation)) if formulation == "variational" and n_assets is None: raise ValueError( "One needs to provide n_assets for the variational formulation." ) if formulation == "analytical" and max_weight != 1: raise ValueError( "Cannot constraint weights via max_weight for analytical formulation" ) if formulation == "variational" and n_assets * max_weight < 1: raise ValueError( "One cannot create fully invested portfolio with the given max_weight" ) self.formulation = formulation if formulation == "analytical": self.layer = torch.nn.Softmax(dim=1) else: x = cp.Parameter(n_assets) w = cp.Variable(n_assets) obj = -x @ w - cp.sum(cp.entr(w)) cons = [cp.sum(w) == 1.0, w <= max_weight] prob = cp.Problem(cp.Minimize(obj), cons) self.layer = CvxpyLayer(prob, [x], [w])
def test_least_squares(self): set_seed(243) m, n = 100, 20 A = cp.Parameter((m, n)) b = cp.Parameter(m) x = cp.Variable(n) obj = cp.sum_squares(A@x - b) + cp.sum_squares(x) prob = cp.Problem(cp.Minimize(obj)) prob_th = CvxpyLayer(prob, [A, b], [x]) A_th = torch.randn(m, n).double().requires_grad_() b_th = torch.randn(m).double().requires_grad_() x = prob_th(A_th, b_th, solver_args={"eps": 1e-10})[0] def lstsq( A, b): return torch.solve( (A_th.t() @ b_th).unsqueeze(1), A_th.t() @ A_th + torch.eye(n).double())[0] x_lstsq = lstsq(A_th, b_th) grad_A_cvxpy, grad_b_cvxpy = grad(x.sum(), [A_th, b_th]) grad_A_lstsq, grad_b_lstsq = grad(x_lstsq.sum(), [A_th, b_th]) self.assertAlmostEqual( torch.norm( grad_A_cvxpy - grad_A_lstsq).item(), 0.0) self.assertAlmostEqual( torch.norm( grad_b_cvxpy - grad_b_lstsq).item(), 0.0)
def __init__(self, board_size, g_dim, a_dim, q_penalty=1e-3): super(OptNetLayer, self).__init__() flat_board_size = board_size**3 # random normal initializations: # self.Q_sqrt = nn.Parameter(q_penalty*torch.randn(flat_board_size, flat_board_size, dtype=torch.double)) # self.G = nn.Parameter(torch.randn(g_dim, flat_board_size, dtype=torch.double)) # self.h = nn.Parameter(torch.randn(g_dim, dtype=torch.double)) # self.A = nn.Parameter(torch.randn(a_dim, flat_board_size, dtype=torch.double)) # self.b = nn.Parameter(torch.randn(a_dim, dtype=torch.double)) # these definitions are lifted from the example cited above: self.Q_sqrt = nn.Parameter( q_penalty * torch.eye(flat_board_size, dtype=torch.double)) self.G = nn.Parameter( -torch.eye(g_dim, flat_board_size, dtype=torch.double)) self.h = nn.Parameter(torch.zeros(g_dim, dtype=torch.double)) self.A = nn.Parameter( torch.rand((a_dim, flat_board_size), dtype=torch.double)) self.b = nn.Parameter(torch.ones(a_dim, dtype=torch.double)) z = cp.Variable(flat_board_size) Q_sqrt = cp.Parameter((flat_board_size, flat_board_size)) G = cp.Parameter((g_dim, flat_board_size)) h = cp.Parameter(g_dim) A = cp.Parameter((a_dim, flat_board_size)) b = cp.Parameter(a_dim) q = cp.Parameter(flat_board_size) objective = cp.Minimize(0.5 * cp.sum_squares(Q_sqrt @ z) + q.T @ z) constraints = [A @ z == b, G @ z <= h] prob = cp.Problem(objective, constraints) self.layer = CvxpyLayer(prob, parameters=[Q_sqrt, q, A, b, G, h], variables=[z])
def test_portfolio(model, covariance_model, epoch, dataset, device='cpu', evaluate=False): model.eval() covariance_model.eval() loss_fn = torch.nn.MSELoss() test_losses, test_objs = [], [] test_opts = [] forward_time, inference_time, qp_time, backward_time = 0, 0, 0, 0 with tqdm.tqdm(dataset) as tqdm_loader: for batch_idx, (features, covariance_mat, labels) in enumerate(tqdm_loader): forward_start_time = time.time() features, covariance_mat, labels = features[0].to(device), covariance_mat[0].to(device), labels[0,:,0].to(device).float() # only one single data n = len(covariance_mat) Q_real = computeCovariance(covariance_mat) * (1 - REG) + torch.eye(n) * REG if epoch == -1: predictions = labels Q = Q_real else: predictions = model(features.float())[:,0] Q = covariance_model() * (1 - REG) + torch.eye(n) * REG loss = loss_fn(predictions, labels) if evaluate: forward_time += time.time() - forward_start_time inference_start_time = time.time() p = predictions L = sqrtm(Q) # torch.cholesky(Q) # =============== solving QP using qpth ================ if solver == 'qpth': G = -torch.eye(n) h = torch.zeros(n) A = torch.ones(1,n) b = torch.ones(1) qp_solver = qpth.qp.QPFunction() x = qp_solver(alpha * Q, -p, G, h, A, b)[0] # x_opt = qp_solver(alpha * Q_real, -labels, G, h, A, b)[0] # =============== solving QP using CVXPY =============== elif solver == 'cvxpy': x_var = cp.Variable(n) L_para = cp.Parameter((n,n)) p_para = cp.Parameter(n) constraints = [x_var >= 0, x_var <= 1, cp.sum(x_var) == 1] objective = cp.Minimize(0.5 * alpha * cp.sum_squares(L_para @ x_var) + p_para.T @ x_var) problem = cp.Problem(objective, constraints) cvxpylayer = CvxpyLayer(problem, parameters=[L_para, p_para], variables=[x_var]) x, = cvxpylayer(L, -p) obj = labels @ x - 0.5 * alpha * x.t() @ Q_real @ x # opt = labels @ x_opt - 0.5 * alpha * x_opt.t() @ Q_real @ x_opt # print('obj:', obj, 'opt:', opt) inference_time += time.time() - inference_start_time # ======= opt === # p_opt = labels # L_opt = torch.cholesky(Q_real) # x_opt, = cvxpylayer(L_opt, p_opt) # opt = labels @ x_opt - 0.5 * alpha * x_opt.t() @ Q_real @ x_opt # test_opts.append(opt.item()) else: obj = torch.Tensor([0]) test_losses.append(loss.item()) test_objs.append(obj.item()) tqdm_loader.set_postfix(loss=f'{loss.item():.6f}', obj=f'{obj.item()*100:.6f}%') # print('opts:', test_opts) average_loss = np.mean(test_losses) average_obj = np.mean(test_objs) return average_loss, average_obj # , (forward_time, inference_time, qp_time, backward_time)
def getDefUtility(single_data, T, s, unbiased_probs_pred, path_model, cut_size, omega=4, verbose=False, initial_coverage_prob=None, training_mode=True, training_method='surrogate-decision-focused', block_selection='coverage'): G, Fv, coverage_prob, phi_true, path_list, min_cut, log_prob, unbiased_probs_true, previous_gradient = single_data n, m, variable_size = G.number_of_nodes(), G.number_of_edges(), T.shape[1] budget = G.graph['budget'] U = torch.Tensor(G.graph['U']) initial_distribution = torch.Tensor(G.graph['initial_distribution']) options = {"maxiter": 100, "disp": verbose} tol = None method = "SLSQP" edges = G.edges() # full forward path, the decision variables are the entire set of variables # initial_coverage_prob = np.zeros(variable_size) # initial_coverage_prob = np.random.rand(m) # somehow this is very influential... initial_coverage_prob = np.ones( variable_size) # somehow this is very influential... initial_coverage_prob = initial_coverage_prob / np.sum( initial_coverage_prob) * budget forward_start_time = time.time() pred_optimal_res = surrogate_get_optimal_coverage_prob( T.detach(), s, G, unbiased_probs_pred.detach(), U, initial_distribution, budget, omega=omega, options=options, method=method, initial_coverage_prob=initial_coverage_prob, tol=tol) # scipy version pred_optimal_coverage = torch.Tensor(pred_optimal_res['x']) if not pred_optimal_res['success']: print('optimization fails...') print(pred_optimal_res) forward_time = time.time() - forward_start_time # ========================== QP part =========================== qp_start_time = time.time() scale_constant = 1 # cut_size A_original, b_original = torch.ones( 1, cut_size) / scale_constant, torch.Tensor([budget]) A_matrix, b_matrix = A_original @ T, b_original # - A_original @ s G_original = torch.cat((-torch.eye(cut_size), torch.eye(cut_size))) h_original = torch.cat((torch.zeros(cut_size), torch.ones(cut_size))) # G_matrix = torch.cat((G_original, A_original)) @ T # h_matrix = torch.cat((torch.zeros(cut_size), torch.ones(cut_size), b_original)) # - G_original @ s G_matrix = torch.cat((G_original @ T, -torch.eye(variable_size))) h_matrix = torch.cat( (h_original, torch.zeros(variable_size))) # - G_original @ s if training_mode and pred_optimal_res['success']: solver_option = 'default' # I seriously don't know wherether to use 'default' or 'gurobi' now... # Gurobi performs well when there is no noise but default performs well when there is noise # But theoretically they should perform roughly the same... # cut_size = 10 # edge_set = np.array(sorted(np.random.choice(range(m), size=cut_size, replace=False))) edge_set = list(range(m)) hessian_start_time = time.time() # Q = torch.eye(len(pred_optimal_coverage)) Q = numerical_surrogate_obj_hessian_matrix_form(pred_optimal_coverage, T.detach(), s.detach(), G, unbiased_probs_pred, U, initial_distribution, omega=omega, edge_set=edge_set) # Q = surrogate_obj_hessian_matrix_form(pred_optimal_coverage, T.detach(), G, unbiased_probs_pred, U, initial_distribution, omega=omega, edge_set=edge_set) # Q = np_surrogate_obj_hessian_matrix_form(pred_optimal_coverage, T.detach(), G, unbiased_probs_pred, U, initial_distribution, omega=omega) jac = torch_surrogate_dobj_dx_matrix_form(pred_optimal_coverage, T, s, G, unbiased_probs_pred, U, initial_distribution, omega=omega, lib=torch, edge_set=edge_set) # jac = surrogate_dobj_dx_matrix_form(pred_optimal_coverage, T, s, G, unbiased_probs_pred, U, initial_distribution, omega=omega, lib=torch, edge_set=edge_set) Q_sym = (Q + Q.t()) / 2 hessian_time = time.time() - hessian_start_time # print('Hessian time:', hessian_time) # ------------------ regularization ----------------------- Q_regularized = Q_sym.clone() reg_const = 0.1 # eigenvalues, _ = torch.eig(Q_sym) # eigenvalues = eigenvalues[:,0] # Q_regularized = Q_sym + torch.eye(variable_size) * max(0, -min(eigenvalues) + reg_const) while True: # ------------------ eigen regularization ----------------------- # Q_regularized = Q_sym + torch.eye(len(edge_set)) * max(0, -min(eigenvalues) + reg_const) # ----------------- diagonal regularization --------------------- Q_regularized[range(variable_size), range(variable_size)] = torch.clamp( torch.diag(Q_sym), min=reg_const) try: L = torch.cholesky(Q_regularized) break except: reg_const *= 2 p = jac.view(1, -1) - Q_regularized @ pred_optimal_coverage # L = torch.cholesky(Q_regularized) try: x = cp.Variable(variable_size) A_default, b_default = cp.Parameter( (1, variable_size)), cp.Parameter(1) G_default, h_default = cp.Parameter( (cut_size * 2 + variable_size, variable_size)), cp.Parameter(cut_size * 2 + variable_size) L_default = cp.Parameter((variable_size, variable_size)) p_default = cp.Parameter(variable_size) constraints = [ A_default @ x == b_default, G_default @ x <= h_default ] objective = cp.Minimize(0.5 * cp.sum_squares(L_default @ x) + p_default.T @ x) problem = cp.Problem(objective, constraints) cvxpylayer = CvxpyLayer(problem, parameters=[ A_default, b_default, G_default, h_default, L_default, p_default ], variables=[x]) coverage_qp_solution, = cvxpylayer(A_matrix, b_matrix, G_matrix, h_matrix, L, p) full_coverage_qp_solution = coverage_qp_solution[0] except: print("CVXPY solver fails... Usually because Q is not PSD") full_coverage_qp_solution = pred_optimal_coverage.clone() pred_defender_utility = -(surrogate_objective_function_matrix_form( full_coverage_qp_solution, T, s, G, unbiased_probs_true, torch.Tensor(U), torch.Tensor(initial_distribution), omega=omega)) else: full_coverage_qp_solution = pred_optimal_coverage.clone() pred_defender_utility = -(surrogate_objective_function_matrix_form( full_coverage_qp_solution, T, s, G, unbiased_probs_true, torch.Tensor(U), torch.Tensor(initial_distribution), omega=omega)) qp_time = time.time() - qp_start_time # ========================= Error message ========================= if (torch.norm(T.detach() @ pred_optimal_coverage - T.detach() @ full_coverage_qp_solution) > 0.1): print( 'QP solution and scipy solution differ {} too much..., not backpropagating this instance' .format( torch.norm(pred_optimal_coverage - full_coverage_qp_solution))) print("objective value (SLSQP): {}".format( surrogate_objective_function_matrix_form( pred_optimal_coverage, T, s, G, unbiased_probs_pred, torch.Tensor(U), torch.Tensor(initial_distribution), omega=omega))) print(pred_optimal_coverage) print("objective value (QP): {}".format( surrogate_objective_function_matrix_form( full_coverage_qp_solution, T, s, G, unbiased_probs_pred, torch.Tensor(U), torch.Tensor(initial_distribution), omega=omega))) print(full_coverage_qp_solution) full_coverage_qp_solution = pred_optimal_coverage.clone() pred_defender_utility = -(surrogate_objective_function_matrix_form( full_coverage_qp_solution, T, s, G, unbiased_probs_true, torch.Tensor(U), torch.Tensor(initial_distribution), omega=omega)) return pred_defender_utility, full_coverage_qp_solution, (forward_time, qp_time)
import cvxpy as cp import torch from cvxpylayers.torch import CvxpyLayer n, m = 2, 3 x = cp.Variable(n) A = cp.Parameter((m, n)) b = cp.Parameter(m) constraints = [x >= 0] objective = cp.Minimize(0.5 * cp.pnorm(A @ x - b, p=1)) problem = cp.Problem(objective, constraints) assert problem.is_dpp() cvxpylayer = CvxpyLayer(problem, parameters=[A, b], variables=[x]) A_tch = torch.randn(m, n, requires_grad=True) b_tch = torch.randn(m, requires_grad=True) # solve the problem solution, = cvxpylayer(A_tch, b_tch) # compute the gradient of the sum of the solution with respect to A, b grad = solution.sum().backward() print(grad)
# Construct CVXPY problem and layer x_cvxpy = cp.Parameter((n, 1)) P_sqrt_cvxpy = cp.Parameter((m, m)) P_21_cvxpy = cp.Parameter((n, m)) q_cvxpy = cp.Parameter((m, 1)) u_cvxpy = cp.Variable((m, 1)) y_cvxpy = cp.Variable((n, 1)) objective = .5 * cp.sum_squares( P_sqrt_cvxpy @ u_cvxpy) + x_cvxpy.T @ y_cvxpy + q_cvxpy.T @ u_cvxpy problem = cp.Problem(cp.Minimize(objective), [cp.norm(u_cvxpy) <= 1, y_cvxpy == P_21_cvxpy @ u_cvxpy]) assert problem.is_dpp() policy = CvxpyLayer(problem, [x_cvxpy, P_sqrt_cvxpy, P_21_cvxpy, q_cvxpy], [u_cvxpy]) ''' ----------------------------------------------------------------------------------- ''' def train(iters): # Initialize with LQR control lyapunov function P_sqrt = torch.from_numpy(P_sqrt_lqr).requires_grad_(True) P_21 = torch.from_numpy(A.T @ P_lqr @ B).requires_grad_(True) q = torch.zeros((m, 1), requires_grad=True, dtype=torch.float64) variables = [P_sqrt, P_21, q] A_tch, B_tch, Q_tch, R_tch = map(torch.from_numpy, [A, B, Q, R]) def g(x, u): return (x.t() @ Q_tch @ x + u.t() @ R_tch @ u).squeeze()
def lasso_sure_cvxpy(X, y, alpha, sigma, random_state=42): # lambda_alpha = [alpha, alpha] n_samples, n_features = X.shape epsilon = 2 * sigma / n_samples**0.3 rng = check_random_state(random_state) delta = rng.randn(n_samples) y2 = y + epsilon * delta Xth, yth, y2th, deltath = map(torch.from_numpy, [X, y, y2, delta]) # set up variables and parameters beta_cp = cp.Variable(n_features) lambda_cp = cp.Parameter(nonneg=True) # set up objective loss = ((1 / (2 * n_samples)) * cp.sum(cp.square(Xth @ beta_cp - yth))) reg = lambda_cp * cp.norm1(beta_cp) objective = loss + reg # define problem problem1 = cp.Problem(cp.Minimize(objective)) assert problem1.is_dpp() # solve problem1 layer = CvxpyLayer(problem1, [lambda_cp], [beta_cp]) alpha_th1 = torch.tensor(alpha, requires_grad=True) beta1, = layer(alpha_th1) # get test loss and it's gradient test_loss1 = (Xth @ beta1 - yth).pow(2).sum() test_loss1 -= 2 * sigma**2 / epsilon * (Xth @ beta1) @ deltath test_loss1.backward() val1 = test_loss1.detach().numpy() grad1 = np.array(alpha_th1.grad) # set up variables and parameters beta_cp = cp.Variable(n_features) lambda_cp = cp.Parameter(nonneg=True) # set up objective loss = ((1 / (2 * n_samples)) * cp.sum(cp.square(Xth @ beta_cp - y2th))) reg = lambda_cp * cp.norm1(beta_cp) objective = loss + reg # define problem problem2 = cp.Problem(cp.Minimize(objective)) assert problem2.is_dpp() # solve problem2 layer = CvxpyLayer(problem2, [lambda_cp], [beta_cp]) alpha_th2 = torch.tensor(alpha, requires_grad=True) beta2, = layer(alpha_th2) # get test loss and it's gradient test_loss2 = 2 * sigma**2 / epsilon * (Xth @ beta2) @ deltath test_loss2.backward() val2 = test_loss2.detach().numpy() grad2 = np.array(alpha_th2.grad) val = val1 + val2 - len(y) * sigma**2 grad = grad1 + grad2 return val, grad
def bregman_map_cvxtorch(s_mat, Wk_plus_value, Wk_minus_value, gamma, l1_pen, dagness_pen, dagness_exp): """ Solves argmin g(W) + <grad f (Wk), W-Wk> + 1/gamma * Dh(W, Wk) with new CVXPY layers and PyTorch this is only implemented for a specific penalty and kernel Args: s_mat (np.array): data matrix Wk_plus_value (np.array): current iterate value for W+ Wk_minus_value (np.array): current iterate value for W- gamma (float): Bregman iteration map param l1_pen (float): lambda in paper dagness_pen (float): mu in paper dagness_exp (float): alpha in paper """ n = s_mat.shape[1] W_plus = cp.Variable((n, n), nonneg=True) W_plus.value = Wk_plus_value W_minus = cp.Variable((n, n), nonneg=True) inv_gamma_param = cp.Parameter(nonneg=True) l1_pen_param = cp.Parameter(nonneg=True) Wk_plus_param = cp.Parameter((n, n), nonneg=True) Wk_minus_param = cp.Parameter((n, n), nonneg=True) W_minus.value = Wk_minus_value sum_W = W_plus + W_minus # sum variable obj_ll = cp.norm(s_mat @ (np.eye(n) - W_plus + W_minus), "fro") ** 2 obj_spars = l1_pen_param * cp.sum(W_plus + W_minus) # Compute C sum_Wk = Wk_plus_value + Wk_minus_value C = compute_C(n, sum_Wk, dagness_pen, dagness_exp, inv_gamma_param) obj_trace = cp.trace(C @ sum_W) obj_kernel = inv_gamma_param * (dagness_pen * (n - 1) * (1 + dagness_exp * cp.norm(sum_W, "fro"))**n) obj = obj_ll + obj_spars + obj_trace + obj_kernel prob = cp.Problem(cp.Minimize(obj), [cp.sum(W_plus) + cp.sum(W_minus) >= n/((n-2)*dagness_exp)]) assert prob.is_dpp(), "{}{}{}{}".format((dagness_pen * (n - 1) * (1 + dagness_exp * cp.norm(sum_W, "fro"))**n).is_dpp()) #set_trace() layer = CvxpyLayer(prob, parameters = [inv_gamma_param, l1_pen_param], variables = [W_plus, W_minus]) #TODO allow GPU torch_gamma = torch.tensor(1 / gamma) torch_l1_pen = torch.tensor(l1_pen) x_star = layer(torch_gamma, torch_l1_pen) #W_plus.value, W_minus.value #set_trace() next_W_plus, next_W_minus = x_star[0].numpy(), x_star[1].numpy() tilde_W_plus = np.maximum(next_W_plus - next_W_minus, 0.0) tilde_W_minus = np.maximum(next_W_minus - next_W_plus, 0.0) tilde_sum = tilde_W_plus + tilde_W_minus # if np.sum(tilde_sum) >= n / ((n - 2) * dagness_exp): return tilde_W_plus, tilde_W_minus else: return np.maximum(next_W_plus, 0), np.maximum(next_W_minus, 0)
def getDefUtility(single_data, unbiased_probs_pred, path_model, cut_size, omega=4, verbose=False, initial_coverage_prob=None, training_mode=True, training_method='two-stage', block_selection='coverage'): G, Fv, coverage_prob, phi_true, path_list, min_cut, log_prob, unbiased_probs_true, previous_gradient = single_data n, m = G.number_of_nodes(), G.number_of_edges() budget = G.graph['budget'] U = torch.Tensor(G.graph['U']) initial_distribution = torch.Tensor(G.graph['initial_distribution']) options = {"maxiter": 100, "disp": verbose} tol = None method = "SLSQP" edges = G.edges() # full forward path, the decision variables are the entire set of variables # initial_coverage_prob = np.zeros(m) # initial_coverage_prob = np.random.rand(m) # somehow this is very influential... initial_coverage_prob = np.ones(m) # somehow this is very influential... initial_coverage_prob = initial_coverage_prob / np.sum(initial_coverage_prob) * budget forward_start_time = time.time() pred_optimal_res = get_optimal_coverage_prob(G, unbiased_probs_pred.detach(), U, initial_distribution, budget, omega=omega, options=options, method=method, initial_coverage_prob=initial_coverage_prob, tol=tol) # scipy version pred_optimal_coverage = torch.Tensor(pred_optimal_res['x']) if not pred_optimal_res['success']: print(pred_optimal_res) print('optimization fails...') forward_time = time.time() - forward_start_time # ======================== edge set choice ===================== qp_start_time = time.time() first_order_derivative = dobj_dx_matrix_form(pred_optimal_coverage, G, unbiased_probs_pred, U, initial_distribution, np.arange(m), omega=omega, lib=torch) if block_selection == 'derivative': sample_distribution = np.abs(first_order_derivative.detach().numpy()) + 1e-3 elif block_selection == 'coverage': sample_distribution = pred_optimal_coverage.detach().numpy() + 1e-3 elif block_selection == 'uniform': sample_distribution = np.ones(m) elif block_selection == 'slack': sample_distribution = np.exp(-np.abs(pred_optimal_coverage.detach().numpy() - 0.5) * 5) else: raise ValueError('Not Implemented Block Selection') sample_distribution /= sum(sample_distribution) if training_method == 'block-decision-focused' or training_method == 'hybrid': # min_sum = 1e-2 while True: edge_set = np.array(sorted(np.random.choice(range(m), size=cut_size, replace=False, p=sample_distribution))) # if sum(pred_optimal_coverage[edge_set]) > min_sum: break elif training_method == 'corrected-block-decision-focused': # min_sum = 1e-2 while True: edge_set = np.array(sorted(np.random.choice(range(m), size=cut_size, replace=False, p=sample_distribution))) indices = np.arange(cut_size) np.random.shuffle(indices) indices1, indices2 = np.array_split(indices, 2) indices1, indices2 = sorted(indices1), sorted(indices2) edge_set1, edge_set2 = edge_set[indices1], edge_set[indices2] # if sum(pred_optimal_coverage[edge_set1]) > min_sum / 10 and sum(pred_optimal_coverage[edge_set2]) > min_sum / 10: break else: edge_set = list(range(m)) off_edge_set = sorted(list(set(range(m)) - set(edge_set))) # ========================== QP part =========================== # A_matrix, b_matrix = torch.Tensor(), torch.Tensor() # G_matrix = torch.cat((-torch.eye(cut_size), torch.eye(cut_size), torch.ones(1, cut_size))) # h_matrix = torch.cat((torch.zeros(cut_size), torch.ones(cut_size), torch.Tensor([sum(pred_optimal_coverage[edge_set])]))) scale_constant = 1 # cut_size A_matrix, b_matrix = torch.ones(1, cut_size)/scale_constant, torch.Tensor([sum(pred_optimal_coverage[edge_set])])/scale_constant G_matrix = torch.cat((-torch.eye(cut_size), torch.eye(cut_size))) h_matrix = torch.cat((torch.zeros(cut_size), torch.ones(cut_size))) if training_mode and pred_optimal_res['success']: # and sum(pred_optimal_coverage[edge_set]) > 0.1: solver_option = 'default' # I seriously don't know wherether to use 'default' or 'gurobi' now... # Gurobi performs well when there is no noise but default performs well when there is noise # But theoretically they should perform roughly the same... hessian_start_time = time.time() Q = obj_hessian_matrix_form(pred_optimal_coverage, G, unbiased_probs_pred, U, initial_distribution, edge_set, omega=omega) jac = dobj_dx_matrix_form(pred_optimal_coverage, G, unbiased_probs_pred, U, initial_distribution, edge_set, omega=omega, lib=torch) Q_sym = (Q + Q.t()) / 2 hessian_time = time.time() - hessian_start_time # print("Hessian time:", hessian_time) # ------------------ regularization ----------------------- Q_regularized = Q_sym.clone() reg_const = 0.1 while True: # ------------------ eigen regularization ----------------------- # Q_regularized = Q_sym + torch.eye(len(edge_set)) * max(0, -min(eigenvalues) + reg_const) # ----------------- diagonal regularization --------------------- Q_regularized[range(cut_size), range(cut_size)] = torch.clamp(torch.diag(Q_sym), min=reg_const) try: L = torch.cholesky(Q_regularized) break except: reg_const *= 2 p = jac.view(1, -1) - Q_regularized @ pred_optimal_coverage[edge_set] if True: # try: x = cp.Variable(cut_size) A_default, b_default = cp.Parameter((1, cut_size)), cp.Parameter(1) G_default, h_default = cp.Parameter((cut_size * 2, cut_size)), cp.Parameter(cut_size * 2) L_default = cp.Parameter((cut_size, cut_size)) p_default = cp.Parameter(cut_size) constraints = [A_default @ x == b_default, G_default @ x <= h_default] objective = cp.Minimize(0.5 * cp.sum_squares(L_default @ x) + p_default.T @ x) problem = cp.Problem(objective, constraints) cvxpylayer = CvxpyLayer(problem, parameters=[A_default, b_default, G_default, h_default, L_default, p_default], variables=[x]) coverage_qp_solution, = cvxpylayer(A_matrix, b_matrix, G_matrix, h_matrix, L, p) full_coverage_qp_solution = pred_optimal_coverage.clone() full_coverage_qp_solution[edge_set] = coverage_qp_solution[0] # except: # print("QP solver fails... Usually because Q is not PSD") # full_coverage_qp_solution = pred_optimal_coverage.clone() pred_defender_utility = -(objective_function_matrix_form(full_coverage_qp_solution, G, unbiased_probs_true, torch.Tensor(U), torch.Tensor(initial_distribution), edge_set, omega=omega)) else: full_coverage_qp_solution = pred_optimal_coverage.clone() pred_defender_utility = -(objective_function_matrix_form(full_coverage_qp_solution, G, unbiased_probs_true, torch.Tensor(U), torch.Tensor(initial_distribution), edge_set, omega=omega)) qp_time = time.time() - qp_start_time # ========================= Error message ========================= if (torch.norm(pred_optimal_coverage - full_coverage_qp_solution) > 0.1): print('QP solution and scipy solution differ {} too much..., not backpropagating this instance'.format(torch.norm(pred_optimal_coverage - full_coverage_qp_solution))) print("objective value (SLSQP): {}".format(objective_function_matrix_form(pred_optimal_coverage, G, unbiased_probs_pred, torch.Tensor(U), torch.Tensor(initial_distribution), edge_set, omega=omega))) print(pred_optimal_coverage) print("objective value (QP): {}".format(objective_function_matrix_form(full_coverage_qp_solution, G, unbiased_probs_pred, torch.Tensor(U), torch.Tensor(initial_distribution), edge_set, omega=omega))) print(full_coverage_qp_solution) full_coverage_qp_solution = pred_optimal_coverage.clone() pred_defender_utility = -(objective_function_matrix_form(full_coverage_qp_solution, G, unbiased_probs_true, torch.Tensor(U), torch.Tensor(initial_distribution), edge_set, omega=omega)) return pred_defender_utility, full_coverage_qp_solution, (forward_time, qp_time)
def train_portfolio(model, covariance_model, optimizer, epoch, dataset, training_method='two-stage', device='cpu', evaluate=False): model.train() covariance_model.train() loss_fn = torch.nn.MSELoss() train_losses, train_objs = [], [] forward_time, inference_time, qp_time, backward_time = 0, 0, 0, 0 with tqdm.tqdm(dataset) as tqdm_loader: for batch_idx, (features, covariance_mat, labels) in enumerate(tqdm_loader): forward_start_time = time.time() features, covariance_mat, labels = features[0].to(device), covariance_mat[0].to(device), labels[0,:,0].to(device).float() # only one single data n = len(covariance_mat) Q_real = computeCovariance(covariance_mat) * (1 - REG) + torch.eye(n) * REG predictions = model(features.float())[:,0] loss = loss_fn(predictions, labels) Q = covariance_model() * (1 - REG) + torch.eye(n) * REG # TODO if evaluate: forward_time += time.time() - forward_start_time inference_start_time = time.time() p = predictions L = sqrtm(Q) # torch.cholesky(Q) # =============== solving QP using qpth ================ if solver == 'qpth': G = -torch.eye(n) h = torch.zeros(n) A = torch.ones(1,n) b = torch.ones(1) qp_solver = qpth.qp.QPFunction() x = qp_solver(alpha * Q, -p, G, h, A, b)[0] # =============== solving QP using CVXPY =============== elif solver == 'cvxpy': x_var = cp.Variable(n) L_para = cp.Parameter((n,n)) p_para = cp.Parameter(n) constraints = [x_var >= 0, x_var <= 1, cp.sum(x_var) == 1] objective = cp.Minimize(0.5 * alpha * cp.sum_squares(L_para @ x_var) + p_para.T @ x_var) problem = cp.Problem(objective, constraints) cvxpylayer = CvxpyLayer(problem, parameters=[L_para, p_para], variables=[x_var]) x, = cvxpylayer(L, -p) obj = labels @ x - 0.5 * alpha * x.t() @ Q_real @ x inference_time += time.time() - inference_start_time # ======= opt === # p_opt = labels # L_opt = torch.cholesky(Q_real) # x_opt, = cvxpylayer(L_opt, p_opt) # opt = labels @ x_opt - 0.5 * alpha * x.t() @ Q_real @ x # print('obj:', obj, 'opt:', opt) else: obj = torch.Tensor([0]) # ====================== back-prop ===================== optimizer.zero_grad() backward_start_time = time.time() try: if training_method == 'two-stage': Q_loss = torch.norm(Q - Q_real) (loss + Q_loss).backward() elif training_method == 'decision-focused': (-obj).backward() # (-obj + loss).backward() # TODO for parameter in model.parameters(): parameter.grad = torch.clamp(parameter.grad, min=-MAX_NORM, max=MAX_NORM) for parameter in covariance_model.parameters(): parameter.grad = torch.clamp(parameter.grad, min=-MAX_NORM, max=MAX_NORM) else: raise ValueError('Not implemented method') except: print("no grad is backpropagated...") pass optimizer.step() backward_time += time.time() - backward_start_time train_losses.append(loss.item()) train_objs.append(obj.item()) tqdm_loader.set_postfix(loss=f'{loss.item():.6f}', obj=f'{obj.item()*100:.6f}%') average_loss = np.mean(train_losses) average_obj = np.mean(train_objs) return average_loss, average_obj, (forward_time, inference_time, qp_time, backward_time)
def surrogate_train_portfolio(model, covariance_model, T_init, optimizer, T_optimizer, epoch, dataset, training_method='surrogate', device='cpu', evaluate=False): model.train() covariance_model.train() loss_fn = torch.nn.MSELoss() train_losses, train_objs = [], [] forward_time, inference_time, qp_time, backward_time = 0, 0, 0, 0 T_size = T_init.shape[1] with tqdm.tqdm(dataset) as tqdm_loader: for batch_idx, (features, covariance_mat, labels) in enumerate(tqdm_loader): forward_start_time = time.time() features, covariance_mat, labels = features[0].to(device), covariance_mat[0].to(device), labels[0,:,0].to(device).float() # only one single data n = len(covariance_mat) Q_real = computeCovariance(covariance_mat) * (1 - REG) + torch.eye(n) * REG predictions = model(features.float())[:,0] loss = loss_fn(predictions, labels) # randomly select column to update # T = init_T T = T_init.detach().clone() random_column = torch.randint(T_init.shape[1], [1]) T[:,random_column] = T_init[:,random_column] Q = covariance_model() * (1 - REG) + torch.eye(n) * REG forward_time += time.time() - forward_start_time inference_start_time = time.time() p = predictions @ T L = sqrtm(T.t() @ Q @ T) # torch.cholesky(T.t() @ Q @ T) # =============== solving QP using qpth ================ if solver == 'qpth': G = -torch.eye(n) @ T h = torch.zeros(n) A = torch.ones(1,n) @ T b = torch.ones(1) qp_solver = qpth.qp.QPFunction() y = qp_solver(alpha * T.t() @ Q @ T, -p, G, h, A, b)[0] x = T @ y # =============== solving QP using CVXPY =============== elif solver == 'cvxpy': y_var = cp.Variable(T_size) L_para = cp.Parameter((T_size,T_size)) p_para = cp.Parameter(T_size) T_para = cp.Parameter((n,T_size)) constraints = [T_para @ y_var >= 0, cp.sum(T_para @ y_var) == 1] objective = cp.Minimize(0.5 * alpha * cp.sum_squares(L_para @ y_var) + p_para.T @ y_var) problem = cp.Problem(objective, constraints) cvxpylayer = CvxpyLayer(problem, parameters=[L_para, p_para, T_para], variables=[y_var]) y, = cvxpylayer(L, -p, T) x = T @ y # print("predicted objective value:", predictions.t() @ x - 0.5 * alpha * x.t() @ Q @ x) obj = labels @ x - 0.5 * alpha * x.t() @ Q_real @ x # print("real objective value:", obj) inference_time += time.time() - inference_start_time # ====================== back-prop ===================== optimizer.zero_grad() T_optimizer.zero_grad() backward_start_time = time.time() try: if training_method == 'surrogate': covariance = computeCovariance(T.t()) T_weight = 0.0 TS_weight = 0.0 T_loss = torch.sum(covariance) - torch.sum(torch.diag(covariance)) (-obj + T_weight * T_loss).backward() for parameter in model.parameters(): parameter.grad = torch.clamp(parameter.grad, min=-MAX_NORM, max=MAX_NORM) for parameter in covariance_model.parameters(): parameter.grad = torch.clamp(parameter.grad, min=-MAX_NORM, max=MAX_NORM) T_init.grad = torch.clamp(T_init.grad, min=-T_MAX_NORM, max=T_MAX_NORM) else: raise ValueError('Not implemented method') except: print("no grad is backpropagated...") pass optimizer.step() T_optimizer.step() T_init.data = normalize_matrix_positive(T_init.data) backward_time += time.time() - backward_start_time train_losses.append(loss.item()) train_objs.append(obj.item()) tqdm_loader.set_postfix(loss=f'{loss.item():.6f}', obj=f'{obj.item()*100:.6f}%', T_loss=f'{T_loss:.3f}') average_loss = np.mean(train_losses) average_obj = np.mean(train_objs) return average_loss, average_obj, (forward_time, inference_time, qp_time, backward_time)
def surrogate_validate_portfolio(model, covariance_model, T, scheduler, T_scheduler, epoch, dataset, training_method='surrogate', device='cpu', evaluate=False): model.eval() covariance_model.eval() loss_fn = torch.nn.MSELoss() validate_losses, validate_objs = [], [] forward_time, inference_time, qp_time, backward_time = 0, 0, 0, 0 T_size = T.shape[1] with tqdm.tqdm(dataset) as tqdm_loader: for batch_idx, (features, covariance_mat, labels) in enumerate(tqdm_loader): forward_start_time = time.time() features, covariance_mat, labels = features[0].to(device), covariance_mat[0].to(device), labels[0,:,0].to(device).float() # only one single data n = len(covariance_mat) Q_real = computeCovariance(covariance_mat) * (1 - REG) + torch.eye(n) * REG predictions = model(features.float())[:,0] loss = loss_fn(predictions, labels) Q = covariance_model() * (1 - REG) + torch.eye(n) * REG forward_time += time.time() - forward_start_time inference_start_time = time.time() p = predictions @ T L = sqrtm(T.t() @ Q @ T) # torch.cholesky(T.t() @ Q @ T) # =============== solving QP using qpth ================ if solver == 'qpth': G = -torch.eye(n) @ T h = torch.zeros(n) A = torch.ones(1,n) @ T b = torch.ones(1) qp_solver = qpth.qp.QPFunction() y = qp_solver(alpha * T.t() @ Q @ T, -p, G, h, A, b)[0] x = T @ y # =============== solving QP using CVXPY =============== elif solver == 'cvxpy': y_var = cp.Variable(T_size) L_para = cp.Parameter((T_size,T_size)) p_para = cp.Parameter(T_size) T_para = cp.Parameter((n,T_size)) constraints = [T_para @ y_var >= 0, cp.sum(T_para @ y_var) == 1] objective = cp.Minimize(0.5 * alpha * cp.sum_squares(L_para @ y_var) + p_para.T @ y_var) problem = cp.Problem(objective, constraints) cvxpylayer = CvxpyLayer(problem, parameters=[L_para, p_para, T_para], variables=[y_var]) y, = cvxpylayer(L, -p, T) x = T @ y obj = labels @ x - 0.5 * alpha * x.t() @ Q_real @ x validate_losses.append(loss.item()) validate_objs.append(obj.item()) tqdm_loader.set_postfix(loss=f'{loss.item():.6f}', obj=f'{obj.item()*100:.6f}%') average_loss = np.mean(validate_losses) average_obj = np.mean(validate_objs) if (epoch > 0): if training_method == "two-stage": scheduler.step(average_loss) elif training_method == "decision-focused": scheduler.step(-average_obj) elif training_method == "surrogate": # covariance = computeCovariance(T.t()) # T_loss = torch.sum(covariance) - torch.sum(torch.diag(covariance)) scheduler.step(-average_obj) T_scheduler.step(-average_obj) else: raise TypeError("Not Implemented Method") return average_loss, average_obj
def surrogate_test_portfolio(model, covariance_model, T, epoch, dataset, device='cpu', evaluate=False): model.eval() covariance_model.eval() loss_fn = torch.nn.MSELoss() test_losses, test_objs = [], [] test_opts = [] forward_time, inference_time, qp_time, backward_time = 0, 0, 0, 0 T_size = T.shape[1] with tqdm.tqdm(dataset) as tqdm_loader: for batch_idx, (features, covariance_mat, labels) in enumerate(tqdm_loader): forward_start_time = time.time() features, covariance_mat, labels = features[0].to(device), covariance_mat[0].to(device), labels[0,:,0].to(device).float() # only one single data n = len(covariance_mat) Q_real = computeCovariance(covariance_mat) * (1 - REG) + torch.eye(n) * REG predictions = model(features.float())[:,0] Q = covariance_model() * (1 - REG) + torch.eye(n) * REG loss = loss_fn(predictions, labels) forward_time += time.time() - forward_start_time inference_start_time = time.time() p = predictions @ T L = sqrtm(T.t() @ Q @ T) # torch.cholesky(T.t() @ Q @ T) # =============== solving QP using qpth ================ if solver == 'qpth': G = -torch.eye(n) @ T h = torch.zeros(n) A = torch.ones(1,n) @ T b = torch.ones(1) qp_solver = qpth.qp.QPFunction() y = qp_solver(alpha * T.t() @ Q @ T, -p, G, h, A, b)[0] x = T @ y # =============== solving QP using CVXPY =============== elif solver == 'cvxpy': y_var = cp.Variable(T_size) L_para = cp.Parameter((T_size,T_size)) p_para = cp.Parameter(T_size) T_para = cp.Parameter((n,T_size)) constraints = [T_para @ y_var >= 0, cp.sum(T_para @ y_var) == 1] objective = cp.Minimize(0.5 * alpha * cp.sum_squares(L_para @ y_var) + p_para.T @ y_var) problem = cp.Problem(objective, constraints) cvxpylayer = CvxpyLayer(problem, parameters=[L_para, p_para, T_para], variables=[y_var]) y, = cvxpylayer(L, -p, T) x = T @ y obj = labels @ x - 0.5 * alpha * x.t() @ Q_real @ x # ======= opt === # p_opt = labels @ T # L_opt = torch.cholesky(T.t() @ Q_real @ T) # y_opt, = cvxpylayer(L_opt, p_opt, T) # x_opt = T @ y_opt # opt = labels @ x_opt - 0.5 * alpha * x_opt.t() @ Q_real @ x_opt # test_opts.append(opt.item()) test_losses.append(loss.item()) test_objs.append(obj.item()) tqdm_loader.set_postfix(loss=f'{loss.item():.6f}', obj=f'{obj.item()*100:.6f}%') average_loss = np.mean(test_losses) average_obj = np.mean(test_objs) return average_loss, average_obj
x = cp.Variable(D) c = cp.Parameter(D) A = cp.Parameter((M, D)) b = cp.Parameter(M) G = cp.Parameter((N, D)) h = cp.Parameter(N) constraints = [A@x == b, G@x <= h] objective = cp.Minimize(c @ x) problem = cp.Problem(objective, constraints) assert problem.is_dpp() # In[10]: cvxpylayer = CvxpyLayer(problem, parameters=[c,A,b,G,h], variables=[x]) c_t = torch.Tensor(_c) A_t = torch.Tensor(_A) b_t = torch.Tensor(_b) G_t = torch.Tensor(_G) h_t = torch.Tensor(_h) c_t.requires_grad=True A_t.requires_grad=True b_t.requires_grad=True G_t.requires_grad=True h_t.requires_grad=True # solve the problem solution, = cvxpylayer(c_t, A_t, b_t, G_t, h_t)