コード例 #1
0
ファイル: double_net.py プロジェクト: urolyi1/MechanismDesign
def test_loop(model, loader, args, device='cpu'):
    # regrets and payments are 2d: n_samples x n_agents; unfairs is 1d: n_samples.
    test_regrets = torch.Tensor().to(device)
    test_payments = torch.Tensor().to(device)

    for i, batch in tqdm(enumerate(loader)):
        batch = batch.to(device)
        misreport_batch = batch.clone().detach()
        utils.optimize_misreports(model,
                                  batch,
                                  misreport_batch,
                                  misreport_iter=args.test_misreport_iter,
                                  lr=args.misreport_lr)

        allocs, payments = model(batch)
        truthful_util = utils.calc_agent_util(batch, allocs, payments)
        misreport_util = utils.tiled_misreport_util(misreport_batch, batch,
                                                    model)

        regrets = misreport_util - truthful_util
        positive_regrets = torch.clamp_min(regrets, 0)

        # Record entire test data
        test_regrets = torch.cat((test_regrets, positive_regrets), dim=0)
        test_payments = torch.cat((test_payments, payments), dim=0)

    mean_regret = test_regrets.sum(dim=1).mean(dim=0).item()
    result = {
        "payment_mean": test_payments.sum(dim=1).mean(dim=0).item(),
        "regret_mean": mean_regret,
        "regret_max": test_regrets.sum(dim=1).max().item(),
    }
    return result
コード例 #2
0
ファイル: double_net.py プロジェクト: urolyi1/MechanismDesign
def train_loop_no_lagrange(model, train_loader, args, device='cpu'):
    payment_mult = 1
    optimizer = optim.Adam(model.parameters(), lr=args.model_lr)

    iter = 0
    for epoch in tqdm(range(args.num_epochs)):
        regrets_epoch = torch.Tensor().to(device)
        payments_epoch = torch.Tensor().to(device)

        for i, batch in enumerate(train_loader):
            iter += 1
            batch = batch.to(device)
            misreport_batch = batch.clone().detach().to(device)
            utils.optimize_misreports(model,
                                      batch,
                                      misreport_batch,
                                      misreport_iter=args.misreport_iter,
                                      lr=args.misreport_lr)

            allocs, payments = model(batch)
            truthful_util = utils.calc_agent_util(batch, allocs, payments)
            misreport_util = utils.tiled_misreport_util(
                misreport_batch, batch, model)
            regrets = misreport_util - truthful_util
            positive_regrets = torch.clamp_min(regrets, 0)

            payment_loss = payments.sum(dim=1).mean() * payment_mult

            if epoch < args.rgt_start:
                regret_loss = 0
            else:
                regret_loss = torch.sqrt(
                    positive_regrets.mean()) + positive_regrets.mean()

            # Add batch to epoch stats
            regrets_epoch = torch.cat((regrets_epoch, regrets), dim=0)
            payments_epoch = torch.cat((payments_epoch, payments), dim=0)

            # Calculate loss
            loss_func = regret_loss - payment_loss

            # update model
            optimizer.zero_grad()
            loss_func.backward()
            optimizer.step()

        # Log training stats
        train_stats = {
            "regret_max": regrets_epoch.max().item(),
            "regret_mean": regrets_epoch.mean().item(),
            "payment": payments_epoch.sum(dim=1).mean().item(),
        }
        print(train_stats)
コード例 #3
0
def test_loop(model, loader, args, device='cpu'):
    # regrets and payments are 2d: n_samples x n_agents; unfairs is 1d: n_samples.
    test_regrets = torch.Tensor().to(device)
    test_payments = torch.Tensor().to(device)

    for i, batch in enumerate(loader):
        batch = batch.to(device)
        misreport_batch = batch.clone().detach()
        optimize_misreports(model,
                            batch,
                            misreport_batch,
                            misreport_iter=args.test_misreport_iter,
                            lr=args.misreport_lr)

        allocs, payments = model(batch)
        truthful_util = calc_agent_util(batch, allocs, payments)
        misreport_util = tiled_misreport_util(misreport_batch, batch, model)

        regrets = misreport_util - truthful_util
        positive_regrets = torch.clamp_min(regrets, 0)

        # Record entire test data
        test_regrets = torch.cat((test_regrets, positive_regrets), dim=0)
        test_payments = torch.cat((test_payments, payments), dim=0)

    mean_regret = test_regrets.sum(dim=1).mean(dim=0).item()
    result = {
        "payment_mean": test_payments.sum(dim=1).mean(dim=0).item(),
        # "regret_std": regret_var ** .5,
        "regret_mean": mean_regret,
        "regret_max": test_regrets.sum(dim=1).max().item(),
    }
    # for i in range(model.n_agents):
    #     agent_regrets = test_regrets[:, i]
    #     result[f"regret_agt{i}_std"] = (((agent_regrets ** 2).mean() - agent_regrets.mean() ** 2) ** .5).item()
    #     result[f"regret_agt{i}_mean"] = agent_regrets.mean().item()
    return result
コード例 #4
0
def train_loop(model, train_loader, args, device="cpu"):
    regret_mults = 5.0 * torch.ones((1, model.n_agents)).to(device)
    payment_mult = 1

    optimizer = optim.Adam(model.parameters(), lr=args.model_lr)

    iter = 0
    rho = args.rho

    # local_optimum_model = None

    for epoch in tqdm(range(args.num_epochs)):
        regrets_epoch = torch.Tensor().to(device)
        payments_epoch = torch.Tensor().to(device)

        for i, batch in enumerate(train_loader):
            iter += 1
            batch = batch.to(device)
            misreport_batch = batch.clone().detach().to(device)
            utils.optimize_misreports(model,
                                      batch,
                                      misreport_batch,
                                      misreport_iter=args.misreport_iter,
                                      lr=args.misreport_lr)

            allocs, payments = model(batch)
            truthful_util = utils.calc_agent_util(batch, allocs, payments)
            misreport_util = utils.tiled_misreport_util(
                misreport_batch, batch, model)
            regrets = misreport_util - truthful_util
            positive_regrets = torch.clamp_min(regrets, 0)

            payment_loss = payments.sum(dim=1).mean() * payment_mult

            if epoch < args.rgt_start:
                regret_loss = 0
                regret_quad = 0
            else:
                regret_loss = (regret_mults * positive_regrets).mean()
                regret_quad = (rho / 2.0) * (positive_regrets**2).mean()
                # regret_loss = (regret_mults * (positive_regrets + positive_regrets.max(dim=0).values) / 2).mean()
                # regret_quad = (rho / 2.0) * ((positive_regrets ** 2).mean() +
                #                              (positive_regrets.max(dim=0).values ** 2).mean()) / 2

            # Add batch to epoch stats
            regrets_epoch = torch.cat((regrets_epoch, regrets), dim=0)
            payments_epoch = torch.cat((payments_epoch, payments), dim=0)
            # price_of_fair_epoch = torch.cat((price_of_fair_epoch, price_of_fair), dim=0)

            # Calculate loss
            loss_func = regret_loss \
                        + regret_quad \
                        - payment_loss \

            # update model
            optimizer.zero_grad()
            loss_func.backward()
            optimizer.step()

            # update various fancy multipliers
            # if epoch >= args.rgt_start:
            if iter % args.lagr_update_iter == 0:
                with torch.no_grad():
                    regret_mults += rho * positive_regrets.mean(dim=0)
            if iter % args.rho_incr_iter == 0:
                rho += args.rho_incr_amount
            # if epoch >= args.fair_start:
            # if local_optimum_model is None:
            #     local_optimum_model = RegretNet(args.n_agents, args.n_items, activation='relu',
            #                                     hidden_layer_size=args.hidden_layer_size,
            #                                     n_hidden_layers=args.n_hidden_layers,
            #                                     separate=args.separate).to(device)
            #     local_optimum_model.load_state_dict(model.state_dict())

        # Log training stats
        train_stats = {
            "regret_max": regrets_epoch.max().item(),
            # "regret_min": regrets_epoch.min().item(),
            "regret_mean": regrets_epoch.mean().item(),

            # "payment_max": payments_epoch.sum(dim=1).max().item(),
            # "payment_min": payments_epoch.sum(dim=1).min().item(),
            "payment": payments_epoch.sum(dim=1).mean().item(),

            # "fairprice_max": price_of_fair_epoch.max().item(),
            # "fairprice_min": price_of_fair_epoch.min().item(),
            # "fairprice_mean": price_of_fair_epoch.mean().item(),
        }
        print(train_stats)

        mult_stats = {
            "regret_mult": regret_mults.mean().item(),
            # "regret_rho": rho,
            "payment_mult": payment_mult,
            # "fair_rho": rho_fair
        }
コード例 #5
0
ファイル: double_net.py プロジェクト: urolyi1/MechanismDesign
def train_loop_sinkhorn_decay(
    model,
    train_loader,
    args,
    device='cpu',
    decay_iter=1000,
    decay_mult=0.9,
):
    regret_mults = 5.0 * torch.ones((1, model.n_agents)).to(device)
    payment_mult = 1
    optimizer = optim.Adam(model.parameters(), lr=args.model_lr)

    iter = 0
    rho = args.rho

    mean_regrets = []
    mean_payments = []
    lagrange_mults = []
    for epoch in tqdm(range(args.num_epochs)):
        regrets_epoch = torch.Tensor().to(device)
        payments_epoch = torch.Tensor().to(device)

        for i, batch in enumerate(train_loader):
            iter += 1
            batch = batch.to(device)
            misreport_batch = batch.clone().detach().to(device)
            utils.optimize_misreports(model,
                                      batch,
                                      misreport_batch,
                                      misreport_iter=args.misreport_iter,
                                      lr=args.misreport_lr)

            allocs, payments = model(batch)
            truthful_util = utils.calc_agent_util(batch, allocs, payments)
            misreport_util = utils.tiled_misreport_util(
                misreport_batch, batch, model)
            regrets = torch.clamp(misreport_util - truthful_util, min=0)
            positive_regrets = torch.clamp_min(regrets, 0)

            payment_loss = payments.sum(dim=1).mean() * payment_mult

            if epoch < args.rgt_start:
                regret_loss = 0
                regret_quad = 0
            else:
                regret_loss = (regret_mults * positive_regrets).mean()
                regret_quad = (rho / 2.0) * (positive_regrets**2).mean()

            # Add batch to epoch stats
            regrets_epoch = torch.cat((regrets_epoch, regrets), dim=0)
            payments_epoch = torch.cat((payments_epoch, payments), dim=0)

            # Calculate loss
            loss_func = regret_loss + regret_quad - payment_loss

            # update model
            optimizer.zero_grad()
            loss_func.backward()
            optimizer.step()

            # update various fancy multipliers
            # if epoch >= args.rgt_start:
            if iter % args.lagr_update_iter == 0:
                with torch.no_grad():
                    regret_mults += rho * positive_regrets.mean(dim=0)
            if iter % args.rho_incr_iter == 0:
                with torch.no_grad():
                    rho += args.rho_incr_amount
            if iter % decay_iter:
                with torch.no_grad():
                    model.sinkhorn_epsilon = model.sinkhorn_epsilon * decay_mult

        # Log training stats
        train_stats = {
            "regret_max": regrets_epoch.max().item(),
            "regret_mean": regrets_epoch.mean().item(),
            "regret_mults": regret_mults,
            "payment": payments_epoch.sum(dim=1).mean().item(),
        }

        # append metrics to lists
        mean_regrets.append(regrets_epoch.mean().item())
        mean_payments.append(payments_epoch.sum(dim=1).mean().item())
        lagrange_mults.append(regret_mults)
        print(train_stats)

    return mean_regrets, mean_payments, lagrange_mults