def test_loop(model, loader, args, device='cpu'): # regrets and payments are 2d: n_samples x n_agents; unfairs is 1d: n_samples. test_regrets = torch.Tensor().to(device) test_payments = torch.Tensor().to(device) for i, batch in tqdm(enumerate(loader)): batch = batch.to(device) misreport_batch = batch.clone().detach() utils.optimize_misreports(model, batch, misreport_batch, misreport_iter=args.test_misreport_iter, lr=args.misreport_lr) allocs, payments = model(batch) truthful_util = utils.calc_agent_util(batch, allocs, payments) misreport_util = utils.tiled_misreport_util(misreport_batch, batch, model) regrets = misreport_util - truthful_util positive_regrets = torch.clamp_min(regrets, 0) # Record entire test data test_regrets = torch.cat((test_regrets, positive_regrets), dim=0) test_payments = torch.cat((test_payments, payments), dim=0) mean_regret = test_regrets.sum(dim=1).mean(dim=0).item() result = { "payment_mean": test_payments.sum(dim=1).mean(dim=0).item(), "regret_mean": mean_regret, "regret_max": test_regrets.sum(dim=1).max().item(), } return result
def train_loop_no_lagrange(model, train_loader, args, device='cpu'): payment_mult = 1 optimizer = optim.Adam(model.parameters(), lr=args.model_lr) iter = 0 for epoch in tqdm(range(args.num_epochs)): regrets_epoch = torch.Tensor().to(device) payments_epoch = torch.Tensor().to(device) for i, batch in enumerate(train_loader): iter += 1 batch = batch.to(device) misreport_batch = batch.clone().detach().to(device) utils.optimize_misreports(model, batch, misreport_batch, misreport_iter=args.misreport_iter, lr=args.misreport_lr) allocs, payments = model(batch) truthful_util = utils.calc_agent_util(batch, allocs, payments) misreport_util = utils.tiled_misreport_util( misreport_batch, batch, model) regrets = misreport_util - truthful_util positive_regrets = torch.clamp_min(regrets, 0) payment_loss = payments.sum(dim=1).mean() * payment_mult if epoch < args.rgt_start: regret_loss = 0 else: regret_loss = torch.sqrt( positive_regrets.mean()) + positive_regrets.mean() # Add batch to epoch stats regrets_epoch = torch.cat((regrets_epoch, regrets), dim=0) payments_epoch = torch.cat((payments_epoch, payments), dim=0) # Calculate loss loss_func = regret_loss - payment_loss # update model optimizer.zero_grad() loss_func.backward() optimizer.step() # Log training stats train_stats = { "regret_max": regrets_epoch.max().item(), "regret_mean": regrets_epoch.mean().item(), "payment": payments_epoch.sum(dim=1).mean().item(), } print(train_stats)
def test_loop(model, loader, args, device='cpu'): # regrets and payments are 2d: n_samples x n_agents; unfairs is 1d: n_samples. test_regrets = torch.Tensor().to(device) test_payments = torch.Tensor().to(device) for i, batch in enumerate(loader): batch = batch.to(device) misreport_batch = batch.clone().detach() optimize_misreports(model, batch, misreport_batch, misreport_iter=args.test_misreport_iter, lr=args.misreport_lr) allocs, payments = model(batch) truthful_util = calc_agent_util(batch, allocs, payments) misreport_util = tiled_misreport_util(misreport_batch, batch, model) regrets = misreport_util - truthful_util positive_regrets = torch.clamp_min(regrets, 0) # Record entire test data test_regrets = torch.cat((test_regrets, positive_regrets), dim=0) test_payments = torch.cat((test_payments, payments), dim=0) mean_regret = test_regrets.sum(dim=1).mean(dim=0).item() result = { "payment_mean": test_payments.sum(dim=1).mean(dim=0).item(), # "regret_std": regret_var ** .5, "regret_mean": mean_regret, "regret_max": test_regrets.sum(dim=1).max().item(), } # for i in range(model.n_agents): # agent_regrets = test_regrets[:, i] # result[f"regret_agt{i}_std"] = (((agent_regrets ** 2).mean() - agent_regrets.mean() ** 2) ** .5).item() # result[f"regret_agt{i}_mean"] = agent_regrets.mean().item() return result
def train_loop(model, train_loader, args, device="cpu"): regret_mults = 5.0 * torch.ones((1, model.n_agents)).to(device) payment_mult = 1 optimizer = optim.Adam(model.parameters(), lr=args.model_lr) iter = 0 rho = args.rho # local_optimum_model = None for epoch in tqdm(range(args.num_epochs)): regrets_epoch = torch.Tensor().to(device) payments_epoch = torch.Tensor().to(device) for i, batch in enumerate(train_loader): iter += 1 batch = batch.to(device) misreport_batch = batch.clone().detach().to(device) utils.optimize_misreports(model, batch, misreport_batch, misreport_iter=args.misreport_iter, lr=args.misreport_lr) allocs, payments = model(batch) truthful_util = utils.calc_agent_util(batch, allocs, payments) misreport_util = utils.tiled_misreport_util( misreport_batch, batch, model) regrets = misreport_util - truthful_util positive_regrets = torch.clamp_min(regrets, 0) payment_loss = payments.sum(dim=1).mean() * payment_mult if epoch < args.rgt_start: regret_loss = 0 regret_quad = 0 else: regret_loss = (regret_mults * positive_regrets).mean() regret_quad = (rho / 2.0) * (positive_regrets**2).mean() # regret_loss = (regret_mults * (positive_regrets + positive_regrets.max(dim=0).values) / 2).mean() # regret_quad = (rho / 2.0) * ((positive_regrets ** 2).mean() + # (positive_regrets.max(dim=0).values ** 2).mean()) / 2 # Add batch to epoch stats regrets_epoch = torch.cat((regrets_epoch, regrets), dim=0) payments_epoch = torch.cat((payments_epoch, payments), dim=0) # price_of_fair_epoch = torch.cat((price_of_fair_epoch, price_of_fair), dim=0) # Calculate loss loss_func = regret_loss \ + regret_quad \ - payment_loss \ # update model optimizer.zero_grad() loss_func.backward() optimizer.step() # update various fancy multipliers # if epoch >= args.rgt_start: if iter % args.lagr_update_iter == 0: with torch.no_grad(): regret_mults += rho * positive_regrets.mean(dim=0) if iter % args.rho_incr_iter == 0: rho += args.rho_incr_amount # if epoch >= args.fair_start: # if local_optimum_model is None: # local_optimum_model = RegretNet(args.n_agents, args.n_items, activation='relu', # hidden_layer_size=args.hidden_layer_size, # n_hidden_layers=args.n_hidden_layers, # separate=args.separate).to(device) # local_optimum_model.load_state_dict(model.state_dict()) # Log training stats train_stats = { "regret_max": regrets_epoch.max().item(), # "regret_min": regrets_epoch.min().item(), "regret_mean": regrets_epoch.mean().item(), # "payment_max": payments_epoch.sum(dim=1).max().item(), # "payment_min": payments_epoch.sum(dim=1).min().item(), "payment": payments_epoch.sum(dim=1).mean().item(), # "fairprice_max": price_of_fair_epoch.max().item(), # "fairprice_min": price_of_fair_epoch.min().item(), # "fairprice_mean": price_of_fair_epoch.mean().item(), } print(train_stats) mult_stats = { "regret_mult": regret_mults.mean().item(), # "regret_rho": rho, "payment_mult": payment_mult, # "fair_rho": rho_fair }
def train_loop_sinkhorn_decay( model, train_loader, args, device='cpu', decay_iter=1000, decay_mult=0.9, ): regret_mults = 5.0 * torch.ones((1, model.n_agents)).to(device) payment_mult = 1 optimizer = optim.Adam(model.parameters(), lr=args.model_lr) iter = 0 rho = args.rho mean_regrets = [] mean_payments = [] lagrange_mults = [] for epoch in tqdm(range(args.num_epochs)): regrets_epoch = torch.Tensor().to(device) payments_epoch = torch.Tensor().to(device) for i, batch in enumerate(train_loader): iter += 1 batch = batch.to(device) misreport_batch = batch.clone().detach().to(device) utils.optimize_misreports(model, batch, misreport_batch, misreport_iter=args.misreport_iter, lr=args.misreport_lr) allocs, payments = model(batch) truthful_util = utils.calc_agent_util(batch, allocs, payments) misreport_util = utils.tiled_misreport_util( misreport_batch, batch, model) regrets = torch.clamp(misreport_util - truthful_util, min=0) positive_regrets = torch.clamp_min(regrets, 0) payment_loss = payments.sum(dim=1).mean() * payment_mult if epoch < args.rgt_start: regret_loss = 0 regret_quad = 0 else: regret_loss = (regret_mults * positive_regrets).mean() regret_quad = (rho / 2.0) * (positive_regrets**2).mean() # Add batch to epoch stats regrets_epoch = torch.cat((regrets_epoch, regrets), dim=0) payments_epoch = torch.cat((payments_epoch, payments), dim=0) # Calculate loss loss_func = regret_loss + regret_quad - payment_loss # update model optimizer.zero_grad() loss_func.backward() optimizer.step() # update various fancy multipliers # if epoch >= args.rgt_start: if iter % args.lagr_update_iter == 0: with torch.no_grad(): regret_mults += rho * positive_regrets.mean(dim=0) if iter % args.rho_incr_iter == 0: with torch.no_grad(): rho += args.rho_incr_amount if iter % decay_iter: with torch.no_grad(): model.sinkhorn_epsilon = model.sinkhorn_epsilon * decay_mult # Log training stats train_stats = { "regret_max": regrets_epoch.max().item(), "regret_mean": regrets_epoch.mean().item(), "regret_mults": regret_mults, "payment": payments_epoch.sum(dim=1).mean().item(), } # append metrics to lists mean_regrets.append(regrets_epoch.mean().item()) mean_payments.append(payments_epoch.sum(dim=1).mean().item()) lagrange_mults.append(regret_mults) print(train_stats) return mean_regrets, mean_payments, lagrange_mults