Esempio n. 1
0
def test_submodular(net,
                    epoch,
                    sample_instance,
                    dataset,
                    device='cpu',
                    evaluate=True):
    net.eval()
    # loss_fn = torch.nn.BCELoss()
    loss_fn = torch.nn.MSELoss()
    test_losses, test_objs = [], []
    n, m, d, f, budget = sample_instance.n, sample_instance.m, torch.Tensor(
        sample_instance.d), torch.Tensor(
            sample_instance.f), sample_instance.budget
    A, b, G, h = createConstraintMatrix(m, n, budget)

    with tqdm.tqdm(dataset) as tqdm_loader:
        for batch_idx, (features, labels) in enumerate(tqdm_loader):
            features, labels = features.to(device), labels.to(device)
            if epoch >= 0:
                outputs = net(features)
            else:
                outputs = labels

            # two-stage loss
            loss = loss_fn(outputs, labels)

            # decision-focused loss
            objective_value_list = []
            batch_size = len(labels)
            for (label, output) in zip(labels, outputs):
                if evaluate:
                    optimize_result = getOptimalDecision(n,
                                                         m,
                                                         output,
                                                         d,
                                                         f,
                                                         budget=budget)
                    optimal_x = torch.Tensor(optimize_result.x)
                    obj = getObjective(optimal_x, n, m, label, d, f)
                else:
                    obj = torch.Tensor([0])
                objective_value_list.append(obj)
            objective = sum(objective_value_list) / batch_size

            test_losses.append(loss.item())
            test_objs.append(objective.item())

            average_loss = np.mean(test_losses)
            average_obj = np.mean(test_objs)

            tqdm_loader.set_postfix(loss=f'{average_loss:.3f}',
                                    obj=f'{average_obj:.3f}')

    average_loss = np.mean(test_losses)
    average_obj = np.mean(test_objs)
    return average_loss, average_obj
        T_optimizer = torch.optim.Adam([T], lr=T_lr)

    optimize_result = getOptimalDecision(n,
                                         m,
                                         torch.Tensor(sample_instance.c),
                                         sample_instance.d,
                                         sample_instance.f,
                                         budget=budget)
    optimal_x = torch.Tensor(optimize_result.x)

    xx = torch.autograd.Variable(optimal_x, requires_grad=True)
    d, f = sample_instance.d, sample_instance.f
    c = torch.Tensor(
        sample_instance.c
    )  # torch.autograd.Variable(torch.Tensor(sample_instance.c), requires_grad=True)
    obj = getObjective(xx, n, m, c, d, f)
    jac_torch = torch.autograd.grad(obj, xx)
    jac_manual = getManualDerivative(xx.detach(), n, m, c, d, f)
    print('torch grad:', jac_torch)
    print('hand grad:', jac_manual)
    hessian = getHessian(optimal_x, n, m, torch.Tensor(c), d, f)

    num_epochs = 20
    train_loss_list, train_obj_list, train_opt_list = [], [], []
    test_loss_list, test_obj_list, test_opt_list = [], [], []
    for epoch in range(-1, num_epochs):
        if training_method == 'surrogate':
            if epoch == -1:
                print('Not training in the first epoch...')
                train_loss, train_obj, train_opt = surrogate_train_submodular(
                    net,
Esempio n. 3
0
def train_submodular(net,
                     optimizer,
                     epoch,
                     sample_instance,
                     dataset,
                     lr=0.1,
                     training_method='two-stage',
                     device='cpu',
                     evaluate=True):
    net.train()
    # loss_fn = torch.nn.BCELoss()
    loss_fn = torch.nn.MSELoss()
    train_losses, train_objs = [], []
    n, m, d, f, budget = sample_instance.n, sample_instance.m, torch.Tensor(
        sample_instance.d), torch.Tensor(
            sample_instance.f), sample_instance.budget
    A, b, G, h = createConstraintMatrix(m, n, budget)
    forward_time, inference_time, qp_time, backward_time = 0, 0, 0, 0
    REG = 0.0

    with tqdm.tqdm(dataset) as tqdm_loader:
        for batch_idx, (features, labels) in enumerate(tqdm_loader):
            net_start_time = time.time()
            features, labels = features.to(device), labels.to(device)
            if epoch >= 0:
                outputs = net(features)
            else:
                outputs = labels
            # two-stage loss
            loss = loss_fn(outputs, labels)
            forward_time += time.time() - net_start_time

            # decision-focused loss
            objective_value_list = []
            batch_size = len(labels)
            for (label, output) in zip(labels, outputs):
                forward_start_time = time.time()
                if training_method == 'decision-focused':
                    inference_start_time = time.time()
                    min_fun = -np.inf
                    for _ in range(1):
                        tmp_result = getOptimalDecision(n,
                                                        m,
                                                        output,
                                                        d,
                                                        f,
                                                        budget=budget,
                                                        REG=REG)
                        if tmp_result.fun > min_fun:
                            optimize_result = tmp_result
                            min_fun = tmp_result.fun
                    inference_time += time.time() - inference_start_time
                    optimal_x = torch.Tensor(optimize_result.x)
                    if optimize_result.success:
                        qp_start_time = time.time()
                        newA, newb = torch.Tensor(), torch.Tensor()
                        newG = torch.cat((A, G))
                        newh = torch.cat((b, h))

                        Q = getHessian(optimal_x, n, m, output, d, f,
                                       REG=REG) + torch.eye(n) * 10
                        L = torch.cholesky(Q)
                        jac = -getDerivative(optimal_x,
                                             n,
                                             m,
                                             output,
                                             d,
                                             f,
                                             create_graph=True,
                                             REG=REG)
                        p = jac - Q @ optimal_x
                        qp_solver = qpth.qp.QPFunction()
                        x = qp_solver(Q, p, G, h, A, b)[0]

                        # if True:
                        #     # =============== solving QP using CVXPY ===============
                        #     x_default = cp.Variable(n)
                        #     G_default, h_default = cp.Parameter(newG.shape), cp.Parameter(newh.shape)
                        #     L_default = cp.Parameter((n,n))
                        #     p_default = cp.Parameter(n)
                        #     constraints = [G_default @ x_default <= h_default]
                        #     objective = cp.Minimize(0.5 * cp.sum_squares(L_default @ x_default) + p_default.T @ x_default)
                        #     problem = cp.Problem(objective, constraints)

                        #     cvxpylayer = CvxpyLayer(problem, parameters=[G_default, h_default, L_default, p_default], variables=[x_default])
                        #     coverage_qp_solution, = cvxpylayer(newG, newh, L, p)
                        #     x = coverage_qp_solution

                        # except:
                        #     print("CVXPY solver fails... Usually because Q is not PSD")
                        #     x = optimal_x

                    else:
                        print('Optimization failed...')
                        x = optimal_x
                    obj = getObjective(x, n, m, label, d, f, REG=0)
                    qp_time += time.time() - qp_start_time

                elif training_method == 'two-stage':
                    if evaluate:
                        inference_start_time = time.time()
                        optimize_result = getOptimalDecision(n,
                                                             m,
                                                             output,
                                                             d,
                                                             f,
                                                             budget=budget,
                                                             REG=REG)
                        x = torch.Tensor(optimize_result.x)
                        obj = getObjective(x, n, m, label, d, f, REG=0)
                        inference_time += time.time() - inference_start_time
                        qp_time = 0
                    else:
                        obj = torch.Tensor([0])
                        qp_time = 0
                else:
                    raise ValueError('Not implemented method!')

                objective_value_list.append(obj)
            objective = sum(objective_value_list) / batch_size

            optimizer.zero_grad()
            backward_start_time = time.time()
            try:
                if training_method == 'two-stage':
                    loss.backward()
                elif training_method == 'decision-focused':
                    # (-objective).backward()
                    (-objective * 0.5 + loss * 0.5).backward()  # TODO
                    for parameter in net.parameters():
                        parameter.grad = torch.clamp(parameter.grad,
                                                     min=-MAX_NORM,
                                                     max=MAX_NORM)
                else:
                    raise ValueError('Not implemented method')
            except:
                print("no grad is backpropagated...")
                pass
            optimizer.step()
            backward_time += time.time() - backward_start_time

            train_losses.append(loss.item())
            train_objs.append(objective.item())

            average_loss = np.mean(train_losses)
            average_obj = np.mean(train_objs)
            # Print status
            tqdm_loader.set_postfix(loss=f'{average_loss:.6f}',
                                    obj=f'{average_obj:.6f}')

    average_loss = np.mean(train_losses)
    average_obj = np.mean(train_objs)
    return average_loss, average_obj, (forward_time, inference_time, qp_time,
                                       backward_time)
Esempio n. 4
0
def surrogate_train_submodular(net,
                               init_T,
                               optimizer,
                               T_optimizer,
                               epoch,
                               sample_instance,
                               dataset,
                               lr=0.1,
                               training_method='two-stage',
                               device='cpu'):
    net.train()
    # loss_fn = torch.nn.BCELoss()
    loss_fn = torch.nn.MSELoss()
    train_losses, train_objs, train_T_losses = [], [], []
    x_size, variable_size = init_T.shape
    n, m, d, f, budget = sample_instance.n, sample_instance.m, torch.Tensor(
        sample_instance.d), torch.Tensor(
            sample_instance.f), sample_instance.budget
    A, b, G, h = createSurrogateConstraintMatrix(m, n, budget)
    forward_time, inference_time, qp_time, backward_time = 0, 0, 0, 0

    with tqdm.tqdm(dataset) as tqdm_loader:
        for batch_idx, (features, labels) in enumerate(tqdm_loader):
            forward_start_time = time.time()
            features, labels = features.to(device), labels.to(device)
            if epoch >= 0:
                outputs = net(features)
            else:
                outputs = labels
            # two-stage loss
            loss = loss_fn(outputs, labels)
            forward_time += time.time() - forward_start_time

            # decision-focused loss
            objective_value_list, T_loss_list = [], []
            batch_size = len(labels)

            # randomly select column to update
            T = init_T
            # T = init_T.detach().clone()
            # random_column = torch.randint(init_T.shape[1], [1])
            # T[:,random_column] = init_T[:,random_column]

            # if batch_idx == 0:
            #     plot_graph(labels.detach().numpy(), T.detach().numpy(), epoch)

            for (label, output) in zip(labels, outputs):
                if training_method == 'surrogate':
                    # output = label # for debug only # TODO
                    inference_start_time = time.time()
                    optimize_result = getSurrogateOptimalDecision(
                        T, n, m, output, d, f,
                        budget=budget)  # end-to-end for both T and net
                    inference_time += time.time() - inference_start_time
                    optimal_y = torch.Tensor(optimize_result.x)

                    qp_start_time = time.time()
                    if optimize_result.success:
                        optimal_y = torch.Tensor(optimize_result.x)
                        newA, newb = torch.Tensor(), torch.Tensor()
                        newG = torch.cat(
                            (A @ T, G @ T, -torch.eye(variable_size)))
                        newh = torch.cat((b, h, torch.zeros(variable_size)))
                        # newG = torch.cat((A @ T, G @ T, -torch.eye(variable_size), torch.eye(variable_size)))
                        # newh = torch.cat((b, h, torch.zeros(variable_size), torch.ones(variable_size)))

                        Q = getSurrogateHessian(
                            T, optimal_y, n, m, output, d,
                            f).detach() + torch.eye(len(optimal_y)) * 10
                        L = torch.cholesky(Q)
                        jac = -getSurrogateDerivative(T,
                                                      optimal_y,
                                                      n,
                                                      m,
                                                      output,
                                                      d,
                                                      f,
                                                      create_graph=True)
                        p = jac - Q @ optimal_y
                        qp_solver = qpth.qp.QPFunction()  # TODO unknown bug

                        try:
                            y = qp_solver(Q, p, newG, newh, newA, newb)[0]
                            x = T @ y
                        except:
                            y = optimal_y
                            x = T.detach() @ optimal_y
                            print('qp error! no gradient!')

                        # if True:
                        #     # =============== solving QP using CVXPY ===============
                        #     y_default = cp.Variable(variable_size)
                        #     G_default, h_default = cp.Parameter(newG.shape), cp.Parameter(newh.shape)
                        #     L_default = cp.Parameter((variable_size, variable_size))
                        #     p_default = cp.Parameter(variable_size)
                        #     constraints = [G_default @ y_default <= h_default]
                        #     objective = cp.Minimize(0.5 * cp.sum_squares(L_default @ y_default) + p_default.T @ y_default)
                        #     problem = cp.Problem(objective, constraints)

                        #     cvxpylayer = CvxpyLayer(problem, parameters=[G_default, h_default, L_default, p_default], variables=[y_default])
                        #     coverage_qp_solution, = cvxpylayer(newG, newh, L, p)
                        #     y = coverage_qp_solution
                        #     x = T @ y

                        # time test...
                        # time_test_start = time.time()
                        # for i in range(20):
                        #    _ = getDerivative(x, n, m, output, d, f)
                        # print('original gradient time:', time.time() - time_test_start)

                        # time_test_start = time.time()
                        # for i in range(20):
                        #    _ = getSurrogateDerivative(T, y, n, m, output, d, f, create_graph=False)
                        # print('surrogate gradient time:', time.time() - time_test_start)

                        # except:
                        #     print("CVXPY solver fails... Usually because Q is not PSD")
                        #     y = optimal_y
                        #     x = T.detach() @ optimal_y

                    else:  # torch.norm(y.detach() - optimal_y) > 0.05: # TODO
                        print('Optimization failed...')
                        y = optimal_y
                        x = T.detach() @ optimal_y

                    qp_time += time.time() - qp_start_time
                else:
                    raise ValueError('Not implemented method!')

                obj = getObjective(x, n, m, label, d, f)
                tmp_T_loss = 0  # torch.sum((projected_real_optimal_x - real_optimal_x) ** 2).item()

                objective_value_list.append(obj)
                T_loss_list.append(tmp_T_loss)

            # print(pairwise_distances(T.t().detach().numpy()))
            objective = sum(objective_value_list) / batch_size
            T_loss = torch.Tensor([0])
            # print('objective', objective)

            optimizer.zero_grad()
            backward_start_time = time.time()
            try:
                if training_method == 'two-stage':
                    loss.backward()
                    optimizer.step()
                elif training_method == 'decision-focused':
                    (-objective).backward()
                    for parameter in net.parameters():
                        parameter.grad = torch.clamp(parameter.grad,
                                                     min=-MAX_NORM,
                                                     max=MAX_NORM)
                    optimizer.step()
                elif training_method == 'surrogate':
                    covariance = computeCovariance(T.t())
                    T_loss = torch.sum(covariance) - torch.sum(
                        torch.diag(covariance))
                    T_optimizer.zero_grad()
                    (-objective).backward()
                    # T_loss.backward() # TODO: minimizing reparameterization loss

                    for parameter in net.parameters():
                        parameter.grad = torch.clamp(parameter.grad,
                                                     min=-MAX_NORM,
                                                     max=MAX_NORM)
                    init_T.grad = torch.clamp(init_T.grad,
                                              min=-MAX_NORM,
                                              max=MAX_NORM)
                    optimizer.step()
                    T_optimizer.step()
                    init_T.data = normalize_matrix_positive(init_T.data)
                else:
                    raise ValueError('Not implemented method')
            except:
                print("Error! No grad is backpropagated...")
                pass
            backward_time += time.time() - backward_start_time

            train_losses.append(loss.item())
            train_objs.append(objective.item())
            train_T_losses.append(T_loss.item())

            average_loss = np.mean(train_losses)
            average_obj = np.mean(train_objs)
            average_T_loss = np.mean(train_T_losses)
            # Print status
            # tqdm_loader.set_postfix(loss=f'{average_loss:.3f}', obj=f'{average_obj:.3f}')
            tqdm_loader.set_postfix(loss=f'{average_loss:.3f}',
                                    obj=f'{average_obj:.3f}',
                                    T_loss=f'{average_T_loss:.3f}')

    average_loss = np.mean(train_losses)
    average_obj = np.mean(train_objs)
    return average_loss, average_obj, (forward_time, inference_time, qp_time,
                                       backward_time)
Esempio n. 5
0
def getSurrogateObjective(T, y, n, m, c, d, f, REG=0):
    start_time = time.time()
    x = T @ y
    p_value = getObjective(x, n, m, c, d, f, REG=REG)
    return p_value