def forward(self): security_embeddings = self.embedding(torch.LongTensor(range(self.n))) cov = computeCovariance(security_embeddings) return cov
def surrogate_train_submodular(net, init_T, optimizer, T_optimizer, epoch, sample_instance, dataset, lr=0.1, training_method='two-stage', device='cpu'): net.train() # loss_fn = torch.nn.BCELoss() loss_fn = torch.nn.MSELoss() train_losses, train_objs, train_T_losses = [], [], [] x_size, variable_size = init_T.shape n, m, d, f, budget = sample_instance.n, sample_instance.m, torch.Tensor( sample_instance.d), torch.Tensor( sample_instance.f), sample_instance.budget A, b, G, h = createSurrogateConstraintMatrix(m, n, budget) forward_time, inference_time, qp_time, backward_time = 0, 0, 0, 0 with tqdm.tqdm(dataset) as tqdm_loader: for batch_idx, (features, labels) in enumerate(tqdm_loader): forward_start_time = time.time() features, labels = features.to(device), labels.to(device) if epoch >= 0: outputs = net(features) else: outputs = labels # two-stage loss loss = loss_fn(outputs, labels) forward_time += time.time() - forward_start_time # decision-focused loss objective_value_list, T_loss_list = [], [] batch_size = len(labels) # randomly select column to update T = init_T # T = init_T.detach().clone() # random_column = torch.randint(init_T.shape[1], [1]) # T[:,random_column] = init_T[:,random_column] # if batch_idx == 0: # plot_graph(labels.detach().numpy(), T.detach().numpy(), epoch) for (label, output) in zip(labels, outputs): if training_method == 'surrogate': # output = label # for debug only # TODO inference_start_time = time.time() optimize_result = getSurrogateOptimalDecision( T, n, m, output, d, f, budget=budget) # end-to-end for both T and net inference_time += time.time() - inference_start_time optimal_y = torch.Tensor(optimize_result.x) qp_start_time = time.time() if optimize_result.success: optimal_y = torch.Tensor(optimize_result.x) newA, newb = torch.Tensor(), torch.Tensor() newG = torch.cat( (A @ T, G @ T, -torch.eye(variable_size))) newh = torch.cat((b, h, torch.zeros(variable_size))) # newG = torch.cat((A @ T, G @ T, -torch.eye(variable_size), torch.eye(variable_size))) # newh = torch.cat((b, h, torch.zeros(variable_size), torch.ones(variable_size))) Q = getSurrogateHessian( T, optimal_y, n, m, output, d, f).detach() + torch.eye(len(optimal_y)) * 10 L = torch.cholesky(Q) jac = -getSurrogateDerivative(T, optimal_y, n, m, output, d, f, create_graph=True) p = jac - Q @ optimal_y qp_solver = qpth.qp.QPFunction() # TODO unknown bug try: y = qp_solver(Q, p, newG, newh, newA, newb)[0] x = T @ y except: y = optimal_y x = T.detach() @ optimal_y print('qp error! no gradient!') # if True: # # =============== solving QP using CVXPY =============== # y_default = cp.Variable(variable_size) # G_default, h_default = cp.Parameter(newG.shape), cp.Parameter(newh.shape) # L_default = cp.Parameter((variable_size, variable_size)) # p_default = cp.Parameter(variable_size) # constraints = [G_default @ y_default <= h_default] # objective = cp.Minimize(0.5 * cp.sum_squares(L_default @ y_default) + p_default.T @ y_default) # problem = cp.Problem(objective, constraints) # cvxpylayer = CvxpyLayer(problem, parameters=[G_default, h_default, L_default, p_default], variables=[y_default]) # coverage_qp_solution, = cvxpylayer(newG, newh, L, p) # y = coverage_qp_solution # x = T @ y # time test... # time_test_start = time.time() # for i in range(20): # _ = getDerivative(x, n, m, output, d, f) # print('original gradient time:', time.time() - time_test_start) # time_test_start = time.time() # for i in range(20): # _ = getSurrogateDerivative(T, y, n, m, output, d, f, create_graph=False) # print('surrogate gradient time:', time.time() - time_test_start) # except: # print("CVXPY solver fails... Usually because Q is not PSD") # y = optimal_y # x = T.detach() @ optimal_y else: # torch.norm(y.detach() - optimal_y) > 0.05: # TODO print('Optimization failed...') y = optimal_y x = T.detach() @ optimal_y qp_time += time.time() - qp_start_time else: raise ValueError('Not implemented method!') obj = getObjective(x, n, m, label, d, f) tmp_T_loss = 0 # torch.sum((projected_real_optimal_x - real_optimal_x) ** 2).item() objective_value_list.append(obj) T_loss_list.append(tmp_T_loss) # print(pairwise_distances(T.t().detach().numpy())) objective = sum(objective_value_list) / batch_size T_loss = torch.Tensor([0]) # print('objective', objective) optimizer.zero_grad() backward_start_time = time.time() try: if training_method == 'two-stage': loss.backward() optimizer.step() elif training_method == 'decision-focused': (-objective).backward() for parameter in net.parameters(): parameter.grad = torch.clamp(parameter.grad, min=-MAX_NORM, max=MAX_NORM) optimizer.step() elif training_method == 'surrogate': covariance = computeCovariance(T.t()) T_loss = torch.sum(covariance) - torch.sum( torch.diag(covariance)) T_optimizer.zero_grad() (-objective).backward() # T_loss.backward() # TODO: minimizing reparameterization loss for parameter in net.parameters(): parameter.grad = torch.clamp(parameter.grad, min=-MAX_NORM, max=MAX_NORM) init_T.grad = torch.clamp(init_T.grad, min=-MAX_NORM, max=MAX_NORM) optimizer.step() T_optimizer.step() init_T.data = normalize_matrix_positive(init_T.data) else: raise ValueError('Not implemented method') except: print("Error! No grad is backpropagated...") pass backward_time += time.time() - backward_start_time train_losses.append(loss.item()) train_objs.append(objective.item()) train_T_losses.append(T_loss.item()) average_loss = np.mean(train_losses) average_obj = np.mean(train_objs) average_T_loss = np.mean(train_T_losses) # Print status # tqdm_loader.set_postfix(loss=f'{average_loss:.3f}', obj=f'{average_obj:.3f}') tqdm_loader.set_postfix(loss=f'{average_loss:.3f}', obj=f'{average_obj:.3f}', T_loss=f'{average_T_loss:.3f}') average_loss = np.mean(train_losses) average_obj = np.mean(train_objs) return average_loss, average_obj, (forward_time, inference_time, qp_time, backward_time)
def surrogate_test_portfolio(model, covariance_model, T, epoch, dataset, device='cpu', evaluate=False): model.eval() covariance_model.eval() loss_fn = torch.nn.MSELoss() test_losses, test_objs = [], [] test_opts = [] forward_time, inference_time, qp_time, backward_time = 0, 0, 0, 0 T_size = T.shape[1] with tqdm.tqdm(dataset) as tqdm_loader: for batch_idx, (features, covariance_mat, labels) in enumerate(tqdm_loader): forward_start_time = time.time() features, covariance_mat, labels = features[0].to(device), covariance_mat[0].to(device), labels[0,:,0].to(device).float() # only one single data n = len(covariance_mat) Q_real = computeCovariance(covariance_mat) * (1 - REG) + torch.eye(n) * REG predictions = model(features.float())[:,0] Q = covariance_model() * (1 - REG) + torch.eye(n) * REG loss = loss_fn(predictions, labels) forward_time += time.time() - forward_start_time inference_start_time = time.time() p = predictions @ T L = sqrtm(T.t() @ Q @ T) # torch.cholesky(T.t() @ Q @ T) # =============== solving QP using qpth ================ if solver == 'qpth': G = -torch.eye(n) @ T h = torch.zeros(n) A = torch.ones(1,n) @ T b = torch.ones(1) qp_solver = qpth.qp.QPFunction() y = qp_solver(alpha * T.t() @ Q @ T, -p, G, h, A, b)[0] x = T @ y # =============== solving QP using CVXPY =============== elif solver == 'cvxpy': y_var = cp.Variable(T_size) L_para = cp.Parameter((T_size,T_size)) p_para = cp.Parameter(T_size) T_para = cp.Parameter((n,T_size)) constraints = [T_para @ y_var >= 0, cp.sum(T_para @ y_var) == 1] objective = cp.Minimize(0.5 * alpha * cp.sum_squares(L_para @ y_var) + p_para.T @ y_var) problem = cp.Problem(objective, constraints) cvxpylayer = CvxpyLayer(problem, parameters=[L_para, p_para, T_para], variables=[y_var]) y, = cvxpylayer(L, -p, T) x = T @ y obj = labels @ x - 0.5 * alpha * x.t() @ Q_real @ x # ======= opt === # p_opt = labels @ T # L_opt = torch.cholesky(T.t() @ Q_real @ T) # y_opt, = cvxpylayer(L_opt, p_opt, T) # x_opt = T @ y_opt # opt = labels @ x_opt - 0.5 * alpha * x_opt.t() @ Q_real @ x_opt # test_opts.append(opt.item()) test_losses.append(loss.item()) test_objs.append(obj.item()) tqdm_loader.set_postfix(loss=f'{loss.item():.6f}', obj=f'{obj.item()*100:.6f}%') average_loss = np.mean(test_losses) average_obj = np.mean(test_objs) return average_loss, average_obj
def test_portfolio(model, covariance_model, epoch, dataset, device='cpu', evaluate=False): model.eval() covariance_model.eval() loss_fn = torch.nn.MSELoss() test_losses, test_objs = [], [] test_opts = [] forward_time, inference_time, qp_time, backward_time = 0, 0, 0, 0 with tqdm.tqdm(dataset) as tqdm_loader: for batch_idx, (features, covariance_mat, labels) in enumerate(tqdm_loader): forward_start_time = time.time() features, covariance_mat, labels = features[0].to(device), covariance_mat[0].to(device), labels[0,:,0].to(device).float() # only one single data n = len(covariance_mat) Q_real = computeCovariance(covariance_mat) * (1 - REG) + torch.eye(n) * REG if epoch == -1: predictions = labels Q = Q_real else: predictions = model(features.float())[:,0] Q = covariance_model() * (1 - REG) + torch.eye(n) * REG loss = loss_fn(predictions, labels) if evaluate: forward_time += time.time() - forward_start_time inference_start_time = time.time() p = predictions L = sqrtm(Q) # torch.cholesky(Q) # =============== solving QP using qpth ================ if solver == 'qpth': G = -torch.eye(n) h = torch.zeros(n) A = torch.ones(1,n) b = torch.ones(1) qp_solver = qpth.qp.QPFunction() x = qp_solver(alpha * Q, -p, G, h, A, b)[0] # x_opt = qp_solver(alpha * Q_real, -labels, G, h, A, b)[0] # =============== solving QP using CVXPY =============== elif solver == 'cvxpy': x_var = cp.Variable(n) L_para = cp.Parameter((n,n)) p_para = cp.Parameter(n) constraints = [x_var >= 0, x_var <= 1, cp.sum(x_var) == 1] objective = cp.Minimize(0.5 * alpha * cp.sum_squares(L_para @ x_var) + p_para.T @ x_var) problem = cp.Problem(objective, constraints) cvxpylayer = CvxpyLayer(problem, parameters=[L_para, p_para], variables=[x_var]) x, = cvxpylayer(L, -p) obj = labels @ x - 0.5 * alpha * x.t() @ Q_real @ x # opt = labels @ x_opt - 0.5 * alpha * x_opt.t() @ Q_real @ x_opt # print('obj:', obj, 'opt:', opt) inference_time += time.time() - inference_start_time # ======= opt === # p_opt = labels # L_opt = torch.cholesky(Q_real) # x_opt, = cvxpylayer(L_opt, p_opt) # opt = labels @ x_opt - 0.5 * alpha * x_opt.t() @ Q_real @ x_opt # test_opts.append(opt.item()) else: obj = torch.Tensor([0]) test_losses.append(loss.item()) test_objs.append(obj.item()) tqdm_loader.set_postfix(loss=f'{loss.item():.6f}', obj=f'{obj.item()*100:.6f}%') # print('opts:', test_opts) average_loss = np.mean(test_losses) average_obj = np.mean(test_objs) return average_loss, average_obj # , (forward_time, inference_time, qp_time, backward_time)
def surrogate_validate_portfolio(model, covariance_model, T, scheduler, T_scheduler, epoch, dataset, training_method='surrogate', device='cpu', evaluate=False): model.eval() covariance_model.eval() loss_fn = torch.nn.MSELoss() validate_losses, validate_objs = [], [] forward_time, inference_time, qp_time, backward_time = 0, 0, 0, 0 T_size = T.shape[1] with tqdm.tqdm(dataset) as tqdm_loader: for batch_idx, (features, covariance_mat, labels) in enumerate(tqdm_loader): forward_start_time = time.time() features, covariance_mat, labels = features[0].to(device), covariance_mat[0].to(device), labels[0,:,0].to(device).float() # only one single data n = len(covariance_mat) Q_real = computeCovariance(covariance_mat) * (1 - REG) + torch.eye(n) * REG predictions = model(features.float())[:,0] loss = loss_fn(predictions, labels) Q = covariance_model() * (1 - REG) + torch.eye(n) * REG forward_time += time.time() - forward_start_time inference_start_time = time.time() p = predictions @ T L = sqrtm(T.t() @ Q @ T) # torch.cholesky(T.t() @ Q @ T) # =============== solving QP using qpth ================ if solver == 'qpth': G = -torch.eye(n) @ T h = torch.zeros(n) A = torch.ones(1,n) @ T b = torch.ones(1) qp_solver = qpth.qp.QPFunction() y = qp_solver(alpha * T.t() @ Q @ T, -p, G, h, A, b)[0] x = T @ y # =============== solving QP using CVXPY =============== elif solver == 'cvxpy': y_var = cp.Variable(T_size) L_para = cp.Parameter((T_size,T_size)) p_para = cp.Parameter(T_size) T_para = cp.Parameter((n,T_size)) constraints = [T_para @ y_var >= 0, cp.sum(T_para @ y_var) == 1] objective = cp.Minimize(0.5 * alpha * cp.sum_squares(L_para @ y_var) + p_para.T @ y_var) problem = cp.Problem(objective, constraints) cvxpylayer = CvxpyLayer(problem, parameters=[L_para, p_para, T_para], variables=[y_var]) y, = cvxpylayer(L, -p, T) x = T @ y obj = labels @ x - 0.5 * alpha * x.t() @ Q_real @ x validate_losses.append(loss.item()) validate_objs.append(obj.item()) tqdm_loader.set_postfix(loss=f'{loss.item():.6f}', obj=f'{obj.item()*100:.6f}%') average_loss = np.mean(validate_losses) average_obj = np.mean(validate_objs) if (epoch > 0): if training_method == "two-stage": scheduler.step(average_loss) elif training_method == "decision-focused": scheduler.step(-average_obj) elif training_method == "surrogate": # covariance = computeCovariance(T.t()) # T_loss = torch.sum(covariance) - torch.sum(torch.diag(covariance)) scheduler.step(-average_obj) T_scheduler.step(-average_obj) else: raise TypeError("Not Implemented Method") return average_loss, average_obj
def surrogate_train_portfolio(model, covariance_model, T_init, optimizer, T_optimizer, epoch, dataset, training_method='surrogate', device='cpu', evaluate=False): model.train() covariance_model.train() loss_fn = torch.nn.MSELoss() train_losses, train_objs = [], [] forward_time, inference_time, qp_time, backward_time = 0, 0, 0, 0 T_size = T_init.shape[1] with tqdm.tqdm(dataset) as tqdm_loader: for batch_idx, (features, covariance_mat, labels) in enumerate(tqdm_loader): forward_start_time = time.time() features, covariance_mat, labels = features[0].to(device), covariance_mat[0].to(device), labels[0,:,0].to(device).float() # only one single data n = len(covariance_mat) Q_real = computeCovariance(covariance_mat) * (1 - REG) + torch.eye(n) * REG predictions = model(features.float())[:,0] loss = loss_fn(predictions, labels) # randomly select column to update # T = init_T T = T_init.detach().clone() random_column = torch.randint(T_init.shape[1], [1]) T[:,random_column] = T_init[:,random_column] Q = covariance_model() * (1 - REG) + torch.eye(n) * REG forward_time += time.time() - forward_start_time inference_start_time = time.time() p = predictions @ T L = sqrtm(T.t() @ Q @ T) # torch.cholesky(T.t() @ Q @ T) # =============== solving QP using qpth ================ if solver == 'qpth': G = -torch.eye(n) @ T h = torch.zeros(n) A = torch.ones(1,n) @ T b = torch.ones(1) qp_solver = qpth.qp.QPFunction() y = qp_solver(alpha * T.t() @ Q @ T, -p, G, h, A, b)[0] x = T @ y # =============== solving QP using CVXPY =============== elif solver == 'cvxpy': y_var = cp.Variable(T_size) L_para = cp.Parameter((T_size,T_size)) p_para = cp.Parameter(T_size) T_para = cp.Parameter((n,T_size)) constraints = [T_para @ y_var >= 0, cp.sum(T_para @ y_var) == 1] objective = cp.Minimize(0.5 * alpha * cp.sum_squares(L_para @ y_var) + p_para.T @ y_var) problem = cp.Problem(objective, constraints) cvxpylayer = CvxpyLayer(problem, parameters=[L_para, p_para, T_para], variables=[y_var]) y, = cvxpylayer(L, -p, T) x = T @ y # print("predicted objective value:", predictions.t() @ x - 0.5 * alpha * x.t() @ Q @ x) obj = labels @ x - 0.5 * alpha * x.t() @ Q_real @ x # print("real objective value:", obj) inference_time += time.time() - inference_start_time # ====================== back-prop ===================== optimizer.zero_grad() T_optimizer.zero_grad() backward_start_time = time.time() try: if training_method == 'surrogate': covariance = computeCovariance(T.t()) T_weight = 0.0 TS_weight = 0.0 T_loss = torch.sum(covariance) - torch.sum(torch.diag(covariance)) (-obj + T_weight * T_loss).backward() for parameter in model.parameters(): parameter.grad = torch.clamp(parameter.grad, min=-MAX_NORM, max=MAX_NORM) for parameter in covariance_model.parameters(): parameter.grad = torch.clamp(parameter.grad, min=-MAX_NORM, max=MAX_NORM) T_init.grad = torch.clamp(T_init.grad, min=-T_MAX_NORM, max=T_MAX_NORM) else: raise ValueError('Not implemented method') except: print("no grad is backpropagated...") pass optimizer.step() T_optimizer.step() T_init.data = normalize_matrix_positive(T_init.data) backward_time += time.time() - backward_start_time train_losses.append(loss.item()) train_objs.append(obj.item()) tqdm_loader.set_postfix(loss=f'{loss.item():.6f}', obj=f'{obj.item()*100:.6f}%', T_loss=f'{T_loss:.3f}') average_loss = np.mean(train_losses) average_obj = np.mean(train_objs) return average_loss, average_obj, (forward_time, inference_time, qp_time, backward_time)
def train_portfolio(model, covariance_model, optimizer, epoch, dataset, training_method='two-stage', device='cpu', evaluate=False): model.train() covariance_model.train() loss_fn = torch.nn.MSELoss() train_losses, train_objs = [], [] forward_time, inference_time, qp_time, backward_time = 0, 0, 0, 0 with tqdm.tqdm(dataset) as tqdm_loader: for batch_idx, (features, covariance_mat, labels) in enumerate(tqdm_loader): forward_start_time = time.time() features, covariance_mat, labels = features[0].to(device), covariance_mat[0].to(device), labels[0,:,0].to(device).float() # only one single data n = len(covariance_mat) Q_real = computeCovariance(covariance_mat) * (1 - REG) + torch.eye(n) * REG predictions = model(features.float())[:,0] loss = loss_fn(predictions, labels) Q = covariance_model() * (1 - REG) + torch.eye(n) * REG # TODO if evaluate: forward_time += time.time() - forward_start_time inference_start_time = time.time() p = predictions L = sqrtm(Q) # torch.cholesky(Q) # =============== solving QP using qpth ================ if solver == 'qpth': G = -torch.eye(n) h = torch.zeros(n) A = torch.ones(1,n) b = torch.ones(1) qp_solver = qpth.qp.QPFunction() x = qp_solver(alpha * Q, -p, G, h, A, b)[0] # =============== solving QP using CVXPY =============== elif solver == 'cvxpy': x_var = cp.Variable(n) L_para = cp.Parameter((n,n)) p_para = cp.Parameter(n) constraints = [x_var >= 0, x_var <= 1, cp.sum(x_var) == 1] objective = cp.Minimize(0.5 * alpha * cp.sum_squares(L_para @ x_var) + p_para.T @ x_var) problem = cp.Problem(objective, constraints) cvxpylayer = CvxpyLayer(problem, parameters=[L_para, p_para], variables=[x_var]) x, = cvxpylayer(L, -p) obj = labels @ x - 0.5 * alpha * x.t() @ Q_real @ x inference_time += time.time() - inference_start_time # ======= opt === # p_opt = labels # L_opt = torch.cholesky(Q_real) # x_opt, = cvxpylayer(L_opt, p_opt) # opt = labels @ x_opt - 0.5 * alpha * x.t() @ Q_real @ x # print('obj:', obj, 'opt:', opt) else: obj = torch.Tensor([0]) # ====================== back-prop ===================== optimizer.zero_grad() backward_start_time = time.time() try: if training_method == 'two-stage': Q_loss = torch.norm(Q - Q_real) (loss + Q_loss).backward() elif training_method == 'decision-focused': (-obj).backward() # (-obj + loss).backward() # TODO for parameter in model.parameters(): parameter.grad = torch.clamp(parameter.grad, min=-MAX_NORM, max=MAX_NORM) for parameter in covariance_model.parameters(): parameter.grad = torch.clamp(parameter.grad, min=-MAX_NORM, max=MAX_NORM) else: raise ValueError('Not implemented method') except: print("no grad is backpropagated...") pass optimizer.step() backward_time += time.time() - backward_start_time train_losses.append(loss.item()) train_objs.append(obj.item()) tqdm_loader.set_postfix(loss=f'{loss.item():.6f}', obj=f'{obj.item()*100:.6f}%') average_loss = np.mean(train_losses) average_obj = np.mean(train_objs) return average_loss, average_obj, (forward_time, inference_time, qp_time, backward_time)