def main_exp3(principal, agents, oracle, resp_lst, curr_rep, T, num_agents, d): temp_regr = [] algo_loss = [] # resp_lst: |calA| x T for t in range(T): (a_t, arm_chosen) = principal.choose_action() resp = agents[t].response(a_t, d) principal.loss_func[t] = oracle.compute_loss(resp_lst, t) estimated_loss = 1.0 * principal.loss_func[t][ arm_chosen] / principal.pi[arm_chosen] principal.est_loss[arm_chosen] += estimated_loss arr = np.array([(-principal.eta_exp3) * principal.est_loss[i] for i in range(principal.calA_size)], dtype=np.float128) principal.weights = np.exp(arr) principal.pi = [ principal.weights[i] / sum(principal.weights) for i in range(principal.calA_size) ] # prevent division by almost 0 for j in range(principal.calA_size): if (principal.pi[j] < 0.00000001): principal.pi[j] = 0.00000001 algo_loss.append(principal.loss_func[t][arm_chosen]) temp_regr.append( regret(principal.loss_func, principal.calA, algo_loss, t)) return temp_regr
def main_exp3(bidder, curr_rep, T, num_bidders, num_slots, outcome_space, rank_scores, ctr, reserve, values, bids, threshold, noise): algo_util = [] temp_regr = [] clean_alloc = [[] for _ in range(0, T)] for t in range(0, T): bid_chosen = bidder.bidding() bids[t][0] = bid_chosen bid_vec = deepcopy(bids[t]) gsp_instance = GSP(ctr[t], reserve[t], bid_vec, rank_scores[t], num_slots, num_bidders) allocated = gsp_instance.alloc_func(bidder.id, bids[t][bidder.id]) clean_alloc[t] = [ gsp_instance.alloc_func(bidder.id, bid * bidder.eps) for bid in range(0, bidder.bid_space) ] temp_alloc = deepcopy(clean_alloc[t]) noise_cp = deepcopy(noise) bidder.alloc_func[t] = noise_mask(temp_alloc, noise_cp[t], ctr[t], num_slots) #reward function: value - payment(coming from GSP module) bidder.pay_func[t] = [ gsp_instance.pay_func(bidder.id, bid * bidder.eps) for bid in range(0, bidder.bid_space) ] if allocated > threshold[t]: bidder.reward_func[t] = [(values[t][0] - bidder.pay_func[t][b]) for b in range(0, bidder.bid_space)] else: bidder.reward_func[t] = [0 for _ in range(0, bidder.bid_space)] bidder.utility[t] = normalize(bidder.reward_func[t], bidder.bid_space, 0, 1) #weights update arm_chosen = int(math.ceil(bids[t][0] / bidder.eps)) if bidder.pi[arm_chosen] < 0.0000000001: bidder.pi[arm_chosen] = 0.0000000001 estimated_loss = bidder.utility[t][arm_chosen] / bidder.pi[arm_chosen] bidder.loss[arm_chosen] += estimated_loss arr = np.array([(-bidder.eta_exp3) * bidder.loss[b] for b in range(0, bidder.bid_space)], dtype=np.float128) bidder.weights = np.exp(arr) bidder.pi = [ bidder.weights[b] / sum(bidder.weights) for b in range(0, bidder.bid_space) ] #algo_util.append((bidder.reward_func[t][arm_chosen]*bidder.alloc_func[t][arm_chosen])) algo_util.append( (bidder.reward_func[t][arm_chosen] * clean_alloc[t][arm_chosen])) temp_regr.append( regret(bidder.reward_func, clean_alloc, bidder.bid_space, algo_util, t)) return temp_regr
def main_winexp(bidder,curr_rep, T,num_bidders, num_slots, outcome_space, rank_scores, ctr, reserve, values,bids,threshold, noise,num_adaptive): algo_util = [] temp_regr = [] clean_alloc = [[] for _ in range(0,T)] clean_pay = [[] for _ in range(0,T)] clean_reward = [[] for _ in range(0,T)] for t in range(0,T): bid_chosen = [bidder[i].bidding() for i in range(0,num_adaptive)] for i in range(0,num_adaptive): bids[t][i] = bid_chosen[i] bid_vec = deepcopy(bids[t]) gsp_instance =GSP(ctr[t], reserve[t], bid_vec, rank_scores[t], num_slots, num_bidders) arm_chosen = [int(math.ceil(bids[t][i]/bidder[i].eps)) for i in range(0,num_adaptive)] for i in range(0,num_adaptive): allocated = gsp_instance.alloc_func(bidder[i].id, bids[t][bidder[i].id]) temp = [gsp_instance.alloc_func(bidder[i].id, bid*bidder[i].eps) for bid in range(0, bidder[i].bid_space)] if (i == 0): clean_alloc[t] = deepcopy(temp) clean_pay[t] = [gsp_instance.pay_func(bidder[i].id, bid*bidder[i].eps) for bid in range(0,bidder[i].bid_space)] temp_pay = gsp_instance.pay_func(bidder[i].id, bid_vec[i]) bidder[i].payment[t] = temp_pay # bidder sees noisy data as his allocation noise_cp = deepcopy(noise) bidder[i].currbid[t] = arm_chosen[i]*bidder[i].eps if allocated > threshold[t]: bidder[i].allocated[t] = 1 bidder[i].alloc_func[t] = compute_allocation_function(bidder[i].currbid[:t+1], bidder[i].allocated[:t+1], bidder[i].bid_space, bidder[i].eps) bidder[i].alloc_func[t][arm_chosen[i]] = allocated bidder[i].pay_func[t] = compute_payment_function(bidder[i].currbid[:t+1], bidder[i].payment[:t+1], bidder[i].bid_space, bidder[i].eps) bidder[i].pay_func[t][arm_chosen[i]] = bidder[i].payment[t] temp_reward = [(values[t][0] - bidder[i].pay_func[t][b]) for b in range(0,bidder[i].bid_space)] if (i == 0): clean_reward[t] = [(values[t][0] - clean_pay[t][b]) for b in range(0,bidder[i].bid_space)] bidder[i].reward_func[t] = normalize(temp_reward,bidder[i].bid_space,-1,1) bidder[i].utility[t] = (bidder[i].compute_utility(1, bidder[i].reward_func[t], bidder[i].alloc_func[t])) else: bidder[i].allocated[t] =0 bidder[i].alloc_func[t] = compute_allocation_function(bidder[i].currbid[:t+1], bidder[i].allocated[:t+1], bidder[i].bid_space, bidder[i].eps) bidder[i].alloc_func[t][arm_chosen[i]] = allocated bidder[i].payment[t] = 0 bidder[i].pay_func[t] = [0]*bidder[i].bid_space temp_reward = [0 for _ in range(0,bidder[i].bid_space)] bidder[i].reward_func[t] = normalize(temp_reward,bidder[i].bid_space,-1,1) if (i == 0): clean_reward[t] = [0 for _ in range(0,bidder[i].bid_space)] bidder[i].utility[t] = (bidder[i].compute_utility(0, bidder[i].reward_func[t], bidder[i].alloc_func[t])) (bidder[i].weights, bidder[i].pi) = bidder[i].weights_update_winexp(bidder[i].eta_winexp, bidder[i].utility[t]) algo_util.append((clean_reward[t][arm_chosen[0]]*clean_alloc[t][arm_chosen[0]])) temp_regr.append(regret(clean_reward,clean_alloc,bidder[0].bid_space, algo_util,t)) return temp_regr
def main_winexp(bidder, curr_rep, T, num_bidders, num_slots, outcome_space, rank_scores, ctr, reserve, values, bids, threshold, noise): algo_util = [] temp_regr = [] clean_alloc = [[] for _ in range(0, T)] for t in range(0, T): bid_chosen = bidder.bidding() bids[t][0] = bid_chosen bid_vec = deepcopy(bids[t]) gsp_instance = GSP(ctr[t], reserve[t], bid_vec, rank_scores[t], num_slots, num_bidders) # this is not reported to the bidder, and thus is cleaned of noise allocated = gsp_instance.alloc_func(bidder.id, bids[t][bidder.id]) clean_alloc[t] = [ gsp_instance.alloc_func(bidder.id, bid * bidder.eps) for bid in range(0, bidder.bid_space) ] temp = deepcopy(clean_alloc[t]) # bidder sees noisy data as his allocation noise_cp = deepcopy(noise) bidder.alloc_func[t] = noise_mask(temp, noise_cp[t], ctr[t], num_slots) #reward function: value - payment(coming from GSP module) bidder.pay_func[t] = [ gsp_instance.pay_func(bidder.id, bid * bidder.eps) for bid in range(0, bidder.bid_space) ] #### WIN-EXP computations #### # computation of reward will only be used for the regret if allocated > threshold[t]: bidder.reward_func[t] = [(values[t][0] - bidder.pay_func[t][b]) for b in range(0, bidder.bid_space)] bidder.utility[t] = bidder.compute_utility(1, bidder.reward_func[t], bidder.alloc_func[t]) else: bidder.reward_func[t] = [0 for _ in range(0, bidder.bid_space)] bidder.utility[t] = (bidder.compute_utility( 0, bidder.reward_func[t], bidder.alloc_func[t])) (bidder.weights, bidder.pi) = bidder.weights_update_winexp(bidder.eta_winexp, bidder.utility[t]) # for each auction (at the same t) you choose the same arm arm_chosen = int(math.ceil(bids[t][bidder.id] / bidder.eps)) algo_util.append( (bidder.reward_func[t][arm_chosen] * clean_alloc[t][arm_chosen])) temp_regr.append( regret(bidder.reward_func, clean_alloc, bidder.bid_space, algo_util, t)) return temp_regr
def main_exp3(bidder,curr_rep, T,num_bidders, num_slots, outcome_space, rank_scores, ctr, reserve, values,bids,threshold,noise,num_adaptive): algo_util = [] temp_regr = [] clean_alloc = [[] for _ in range(0,T)] for t in range(0,T): bid_chosen = [bidder[i].bidding() for i in range(0,num_adaptive)] for i in range(0,num_adaptive): bids[t][i] = bid_chosen[i] bid_vec = deepcopy(bids[t]) gsp_instance =GSP(ctr[t], reserve[t], bid_vec, rank_scores[t], num_slots, num_bidders) arm_chosen =[0]*num_adaptive for i in range(0,num_adaptive): allocated = gsp_instance.alloc_func(bidder[i].id, bids[t][bidder[i].id]) temp = [gsp_instance.alloc_func(bidder[i].id, bid*bidder[i].eps) for bid in range(0, bidder[i].bid_space)] if (i == 0): clean_alloc[t] = deepcopy(temp) noise_cp = deepcopy(noise) bidder[i].alloc_func[t] = noise_mask(temp,noise_cp[t],ctr[t], num_slots) #reward function: value - payment(coming from GSP module) bidder[i].pay_func[t] = [gsp_instance.pay_func(bidder[i].id, bid*bidder[i].eps) for bid in range(0, bidder[i].bid_space)] if (i == 0): if allocated > threshold[t]: bidder[i].reward_func[t] = [(values[t][i] - bidder[i].pay_func[t][b]) for b in range(0,bidder[i].bid_space)] else: bidder[i].reward_func[t] = [0 for _ in range(0,bidder[i].bid_space)] bidder[i].utility[t] = bidder[i].reward_func[t] #weights update arm_chosen[i] = int(math.ceil(bids[t][i]/bidder[i].eps)) if bidder[i].pi[arm_chosen[i]] < 0.0000000001: bidder[i].pi[arm_chosen[i]] = 0.0000000001 estimated_loss = -bidder[i].utility[t][arm_chosen[i]]/bidder[i].pi[arm_chosen[i]] bidder[i].loss[arm_chosen[i]] += estimated_loss arr = np.array([(-bidder[i].eta_exp3)*bidder[i].loss[b] for b in range(0,bidder[i].bid_space)], dtype=np.float128) bidder[i].weights = np.exp(arr) bidder[i].pi = [bidder[i].weights[b]/sum(bidder[i].weights) for b in range(0,bidder[i].bid_space)] else: if allocated > threshold[t]: bidder[i].reward_func[t] = [(values[t][0] - bidder[i].pay_func[t][b]) for b in range(0,bidder[i].bid_space)] bidder[i].utility[t] = bidder[i].compute_utility(1, bidder[i].reward_func[t], bidder[i].alloc_func[t]) else: bidder[i].reward_func[t] = [0 for _ in range(0,bidder[i].bid_space)] bidder[i].utility[t] = (bidder[i].compute_utility(0, bidder[i].reward_func[t], bidder[i].alloc_func[t])) (bidder[i].weights, bidder[i].pi) = bidder[i].weights_update_winexp(bidder[i].eta_winexp, bidder[i].utility[t]) algo_util.append((bidder[0].reward_func[t][arm_chosen[0]]*clean_alloc[t][arm_chosen[0]])) temp_regr.append(regret(bidder[0].reward_func,clean_alloc,bidder[0].bid_space, algo_util,t)) return temp_regr
def main_hedge(num_experts, outcomes, experts_reports, T, rep, sample_id): hedge_weights = [1.0] * num_experts hedge_probs = [1.0 / num_experts] * num_experts experts_loss_lst = [[0] * num_experts for _ in range(T)] hedge_loss = [0] * T avg_loss = [0] * T hedge_weighted_loss = [0] * T hedge_rep_regr = [] eta = 1.0 / 4.0 for t in range(T): print("Timestep t=%d for Hedge" % t) exp_chosen = draw(hedge_probs, 0, num_experts) experts_loss_lst[t] = [(outcomes[t] - experts_reports[i][t])**2 for i in range(num_experts)] hedge_loss[t] = experts_loss_lst[t][exp_chosen] # average report for this round avg_rep = 1.0 * sum( [experts_reports[i][t] for i in range(num_experts)]) / num_experts hedge_weighted_rep = 1.0 * sum([ hedge_probs[i] * experts_reports[i][t] for i in range(num_experts) ]) hedge_weighted_loss[t] = (outcomes[t] - hedge_weighted_rep)**2 # weight update temp = [ hedge_weights[i] * (np.exp(-eta * experts_loss_lst[t][i])) for i in range(num_experts) ] hedge_weights = temp # probs update hedge_probs = [ 1.0 * hedge_weights[i] / sum(hedge_weights) for i in range(num_experts) ] (regr_best, bf) = regret(experts_loss_lst, num_experts, hedge_loss, t) hedge_rep_regr.append(regr_best) return (sample_id, num_experts, hedge_rep_regr, [sum(hedge_loss[:t + 1]) for t in range(T) ], [sum(hedge_weighted_loss[:t + 1]) for t in range(T)])
def main_wsux(num_experts, outcomes, experts_reports, T, rep, sample_id): wsux_probs = [1.0 / num_experts] * num_experts experts_loss_lst = [[0] * num_experts for _ in range(T)] wsux_loss = [0] * T wsux_weighted_loss = [0] * T wsux_rep_regr = [] best_fixed_loss = [] est_loss = [[0] * num_experts for _ in range(T)] eta = (1.0 * np.log(num_experts) / (4 * np.sqrt(num_experts) * T))**(2.0 / 3.0) gamma = np.sqrt(1.0 * eta * num_experts) for t in range(T): print("Timestep t=%d for WSU-UX" % t) exp_chosen = draw(wsux_probs, gamma, num_experts) experts_loss_lst[t] = [(outcomes[t] - experts_reports[i][t])**2 for i in range(num_experts)] wsux_loss[t] = experts_loss_lst[t][exp_chosen] est_loss[t][exp_chosen] = 1.0 * experts_loss_lst[t][ exp_chosen] / wsux_probs[exp_chosen] # probs update through wswm cpy = deepcopy(wsux_probs) temp = wswm_compute(wsux_probs, experts_reports, num_experts, outcomes, t) wsux_probs = [ eta * temp[i] + (1.0 - eta) * cpy[i] for i in range(num_experts) ] (regr_best, best_fixed) = regret(experts_loss_lst, num_experts, wsux_loss, t) wsux_rep_regr.append(regr_best) best_fixed_loss.append(best_fixed) return (sample_id, num_experts, wsux_rep_regr, [sum(wsux_loss[:t + 1]) for t in range(T) ], [sum(wsux_weighted_loss[:t + 1]) for t in range(T)], best_fixed_loss)
def main_elf(num_experts, outcomes, experts_reports, T, rep, sample_id): experts_loss_lst = [[] for _ in range(T)] elf_loss = [0] * T avg_loss = [0] * T elf_rep_regr = [] best_fixed_loss = [] wins_for_master_file = [[0 for _ in range(T)] for _ in range(num_experts)] elf_probs_lst = [[] for _ in range(T)] elf_probs_lst[0] = [1.0 / num_experts] * num_experts for t in range(T): print("Timestep t=%d for ELF." % t) exp_chosen_lst = draw_rec(elf_probs_lst, 0.0, t) experts_loss_lst[t] = [(outcomes[t] - experts_reports[i][t])**2 for i in range(num_experts)] # at the current timestep, choose the expert with the most wins (curr_exp_chosen, wins_lst) = most_wins(exp_chosen_lst, num_experts) elf_loss[t] = experts_loss_lst[t][curr_exp_chosen] wins_for_master_file[curr_exp_chosen][t] += 1 # update the elf probs lst wagers = [1.0 / num_experts] * num_experts new_probs_lst = [ a for a in wswm_compute(wagers, experts_reports, num_experts, outcomes, t) ] elf_probs_lst[t] = new_probs_lst # regret computations (regr_best, best_fixed) = regret(experts_loss_lst, num_experts, elf_loss, t) elf_rep_regr.append(regr_best) best_fixed_loss.append(best_fixed) return (sample_id, num_experts, elf_rep_regr, [sum(elf_loss[:t + 1]) for t in range(T)], wins_for_master_file, best_fixed_loss)
def main_wswm(num_experts, outcomes, experts_reports, T, rep, sample_id): wswm_probs = [1.0 / num_experts] * num_experts experts_loss_lst = [[] for _ in range(T)] wswm_loss = [0] * T wswm_weighted_loss = [0] * T wswm_rep_regr = [] best_fixed_loss = [] eta = np.sqrt(1.0 * np.log(num_experts) / (1.0 * T)) for t in range(T): print("Timestep t=%d for WSWM" % t) exp_chosen = draw(wswm_probs, 0, num_experts) experts_loss_lst[t] = [(outcomes[t] - experts_reports[i][t])**2 for i in range(num_experts)] wswm_loss[t] = experts_loss_lst[t][exp_chosen] # loss of <wswm weighted avg rep> - loss of <simple avg rep> wswm_weighted_rep = 1.0 * sum([ wswm_probs[i] * experts_reports[i][t] for i in range(num_experts) ]) wswm_weighted_loss[t] = (outcomes[t] - wswm_weighted_rep)**2 # probs update through wswm cpy = deepcopy(wswm_probs) temp = wswm_compute(wswm_probs, experts_reports, num_experts, outcomes, t) wswm_probs = [ eta * temp[i] + (1.0 - eta) * cpy[i] for i in range(num_experts) ] (regr_best, best_fixed) = regret(experts_loss_lst, num_experts, wswm_loss, t) wswm_rep_regr.append(regr_best) best_fixed_loss.append(best_fixed) return (sample_id, num_experts, wswm_rep_regr, [sum(wswm_loss[:t + 1]) for t in range(T) ], [sum(wswm_weighted_loss[:t + 1]) for t in range(T)], best_fixed_loss)
def main_exp3(num_experts, outcomes, experts_reports, T, rep, sample_id): exp3_weights = [1.0] * num_experts exp3_probs = [1.0 / num_experts] * num_experts experts_loss_lst = [[0] * num_experts for _ in range(T)] est_loss = [[0] * num_experts for _ in range(T)] avg_loss = [0] * T exp3_loss = [0] * T exp3_rep_regr = [] eta = np.sqrt(2 * np.log(num_experts) / (num_experts * T)) for t in range(T): print("Timestep t=%d for EXP3" % t) exp_chosen = draw(exp3_probs, 0, num_experts) experts_loss_lst[t] = [(outcomes[t] - experts_reports[i][t])**2 for i in range(num_experts)] # unbiased estimator est_loss[t][exp_chosen] = 1.0 * experts_loss_lst[t][ exp_chosen] / exp3_probs[exp_chosen] exp3_loss[t] = experts_loss_lst[t][exp_chosen] # weight update according to the estimated losses temp = [ exp3_weights[i] * np.exp(-eta * est_loss[t][i]) for i in range(num_experts) ] exp3_weights = temp # probs update exp3_probs = [ 1.0 * exp3_weights[i] / sum(exp3_weights) for i in range(num_experts) ] (regr_best, bf) = regret(experts_loss_lst, num_experts, exp3_loss, t) exp3_rep_regr.append(regr_best) return (sample_id, num_experts, exp3_rep_regr, [sum(exp3_loss[:t + 1]) for t in range(T)])
def main_mwu(num_experts, outcomes, experts_reports, T, rep, sample_id): mwu_weights = [1.0]*num_experts mwu_probs = [1.0/num_experts]*num_experts experts_loss_lst = [[0]*num_experts for _ in range(T)] mwu_loss = [0]*T avg_loss = [0]*T mwu_weighted_loss = [0]*T uniform_fixed_loss = [0]*T mwu_rep_regr = [] eta = np.sqrt(1.0*np.log(num_experts)/(1.0*T)) for t in range(T): print ("Timestep t=%d for MWU"%t) exp_chosen = draw(mwu_probs, 0, num_experts) experts_loss_lst[t] = [(outcomes[t] - experts_reports[i][t])**2 for i in range(num_experts)] mwu_loss[t] = experts_loss_lst[t][exp_chosen] # average report for this round avg_rep = 1.0*sum([experts_reports[i][t] for i in range(num_experts)])/num_experts # loss of <mwu weighted avg rep> - loss of <simple avg rep> uniform_fixed_loss[t] = (outcomes[t] - avg_rep)**2 mwu_weighted_rep = 1.0*sum([mwu_probs[i]*experts_reports[i][t] for i in range(num_experts)]) mwu_weighted_loss[t] = (outcomes[t] - mwu_weighted_rep)**2 # weight update temp = [mwu_weights[i]*(1.0 - eta*experts_loss_lst[t][i]) for i in range(num_experts)] mwu_weights = temp # probs update mwu_probs = [1.0*mwu_weights[i]/sum(mwu_weights) for i in range(num_experts)] (regr_best, bf) = regret(experts_loss_lst, num_experts, mwu_loss, t) mwu_rep_regr.append(regr_best) return (sample_id, num_experts, mwu_rep_regr, [sum(mwu_loss[:t+1]) for t in range(T)], [sum(mwu_weighted_loss[:t+1]) for t in range(T)], [sum(uniform_fixed_loss[:t+1]) for t in range(T)])
def main_winexp(bidder, curr_rep, T, num_bidders, num_slots, outcome_space, rank_scores, ctr, reserve, values, bids, threshold, noise, num_adaptive): algo_util = [] temp_regr = [] clean_alloc = [[] for _ in range(0, T)] for t in range(0, T): bid_chosen = [bidder[i].bidding() for i in range(0, num_adaptive)] for i in range(0, num_adaptive): bids[t][i] = bid_chosen[i] bid_vec = deepcopy(bids[t]) gsp_instance = GSP(ctr[t], reserve[t], bid_vec, rank_scores[t], num_slots, num_bidders) # this is not reported to the bidder, and thus is cleaned of noise for i in range(0, num_adaptive): allocated = gsp_instance.alloc_func(bidder[i].id, bids[t][bidder[i].id]) temp = [ gsp_instance.alloc_func(bidder[i].id, bid * bidder[i].eps) for bid in range(0, bidder[i].bid_space) ] if (i == 0): clean_alloc[t] = deepcopy(temp) # bidder sees noisy data as his allocation noise_cp = deepcopy(noise) bidder[i].alloc_func[t] = noise_mask(temp, noise_cp[t], ctr[t], num_slots) #reward function: value - payment(coming from GSP module) bidder[i].pay_func[t] = [ gsp_instance.pay_func(bidder[i].id, bid * bidder[i].eps) for bid in range(0, bidder[i].bid_space) ] #### WIN-EXP computations #### # computation of reward will only be used for the regret if (i == 0): if allocated > threshold[t]: bidder[i].reward_func[t] = [ (values[t][0] - bidder[i].pay_func[t][b]) for b in range(0, bidder[i].bid_space) ] bidder[i].utility[t] = bidder[i].compute_utility( 1, bidder[i].reward_func[t], bidder[i].alloc_func[t]) else: bidder[i].reward_func[t] = [ 0 for _ in range(0, bidder[i].bid_space) ] bidder[i].utility[t] = (bidder[i].compute_utility( 0, bidder[i].reward_func[t], bidder[i].alloc_func[t])) (bidder[i].weights, bidder[i].pi) = bidder[i].weights_update_winexp( bidder[i].eta_winexp, bidder[i].utility[t]) else: if allocated > threshold[t]: bidder[i].reward_func[t] = [ (values[t][i] - bidder[i].pay_func[t][b]) for b in range(0, bidder[i].bid_space) ] else: bidder[i].reward_func[t] = [ 0 for _ in range(0, bidder[i].bid_space) ] bidder[i].utility[t] = bidder[i].reward_func[t] #weights update arm_chosen = int(math.ceil(bids[t][i] / bidder[i].eps)) if bidder[i].pi[arm_chosen] < np.exp(-700): bidder[i].pi[arm_chosen] = np.exp(-700) estimated_loss = -bidder[i].utility[t][arm_chosen] / bidder[ i].pi[arm_chosen] bidder[i].loss[arm_chosen] += estimated_loss arr = np.array([(-bidder[i].eta_exp3) * bidder[i].loss[b] for b in range(0, bidder[i].bid_space)], dtype=np.float128) bidder[i].weights = np.exp(arr) bidder[i].pi = [ bidder[i].weights[b] / sum(bidder[i].weights) for b in range(0, bidder[i].bid_space) ] # for each auction (at the same t) you choose the same arm arm = int(math.ceil(bids[t][0] / bidder[0].eps)) algo_util.append((bidder[0].reward_func[t][arm] * clean_alloc[t][arm])) temp_regr.append( regret(bidder[0].reward_func, clean_alloc, bidder[0].bid_space, algo_util, t)) return temp_regr
def main_gexp3(bidder, curr_rep, T, num_bidders, num_slots, outcome_space, rank_scores, ctr, reserve, values, bids, num_auctions): algo_util = [] temp_regr = [] gamma = 1.05 * math.sqrt( bidder.bid_space * math.log(bidder.bid_space, 2) / T) for t in range(0, T): #bid_chosen = round(bidder.gbidding(bidder.weights,gamma),2) bid_chosen = round(bidder.bidding(), 2) # the bid chosen by the learner is the same for all auctions in the batch for auction in range(0, num_auctions): #bid_chosen = round(np.random.uniform(0,1),1) #bid_chosen = round(np.random.uniform(0,1),2) #print ("Bid chosen for timestep t=%d is:%f"%(t,bid_chosen)) #bid_chosen = round(np.random.uniform(0,1),2) bids[auction][t][0] = bid_chosen #print ("Repetition=%d, timestep t=%d, auction=%d"%(curr_rep, t, auction)) #print ("Rank scores") #print rank_scores[auction][t] #print ("CTR") #print ctr[auction][t] #print ("reserve") #print reserve[auction][t] #print ("bids") #print bids[auction][t] allocated = GSP(ctr[auction][t], reserve[auction][t], bids[auction][t], rank_scores[auction][t], num_slots, num_bidders).alloc_func( bidder.id, bids[auction][t][bidder.id]) bidder.alloc_func[auction][t] = [ GSP(ctr[auction][t], reserve[auction][t], bids[auction][t], rank_scores[auction][t], num_slots, num_bidders).alloc_func(bidder.id, bid * bidder.eps) for bid in range(0, bidder.bid_space) ] #print ("alloc func") #print bidder.alloc_func #reward function: value - payment(coming from GSP module) bid_vec = deepcopy(bids[auction][t]) bidder.pay_func[t] = [ GSP(ctr[auction][t], reserve[auction][t], bid_vec, rank_scores[auction][t], num_slots, num_bidders).pay_func(bidder.id, bid * bidder.eps) for bid in range(0, bidder.bid_space) ] #print ("pay func") #print bidder.pay_func bidder.reward_func[auction][t] = compute_reward( bidder.alloc_func[auction][t], bidder.pay_func[t], ctr[auction][t], values[auction][t]) #print ("Bidder's Reward function") #print bidder.reward_func #### EXP3 computations #### #bidder.utility[auction][t] = [bidder.reward_func[auction][t][b]*bidder.alloc_func[auction][t][b] - 1 for b in range(0, bidder.bid_space)] #bidder.utility[t] = [bidder.reward_func[t][b]*bidder.alloc_func[t][b] - 1 for b in range(0, bidder.bid_space)] #print ("Bidder %d utility"%bidder.id) #print bidder.utility u_s = [0 for _ in range(0, bidder.bid_space)] for b in range(0, bidder.bid_space): for auction in range(0, num_auctions): u_s[b] += bidder.reward_func[auction][t][ b] * bidder.alloc_func[auction][t][b] bidder.avg_utility[t].append(u_s[b] / num_auctions - 1) #weights update arm_chosen = int(math.ceil(bids[0][t][0] / bidder.eps)) #print ("Arm Chosen") #print arm_chosen #print ("Average reward at timestep t=%d is:"%t) #print (bidder.avg_reward[t]) #print ("Average utility at timestep t=%d is:"%t) #print (bidder.avg_utility[t]) #print ("Probability vector before computing estimated loss") #print bidder.pi if bidder.pi[arm_chosen] < 0.0000000001: bidder.pi[arm_chosen] = 0.0000000001 #estimated_loss = bidder.avg_utility[t][arm_chosen]/bidder.pi[arm_chosen] estimated_loss = (bidder.avg_utility[t][arm_chosen] - bidder.beta) / bidder.pi[arm_chosen] #print ("Estimated Loss") #print estimated_loss bidder.loss[arm_chosen] += estimated_loss #print bidder.loss bidder.weights = [ math.exp(-bidder.eta_gexp3 * bidder.loss[b]) for b in range(0, bidder.bid_space) ] bidder.pi = [(1 - gamma) * bidder.weights[b] / sum(bidder.weights) + gamma / bidder.bid_space for b in range(0, bidder.bid_space)] #print ("Probability vector after computing estimated loss") #print bidder.pi # compute the algorithm's utility at every step #algo_util.append(bidder.avg_utility[t][int(math.ceil(bids[0][t][bidder.id]/bidder.eps))]) algo_util.append(bidder.avg_utility[t][int( math.ceil(bids[0][t][0] / bidder.eps))]) #print ("Algorithm's average utility") #print algo_util temp_regr.append( regret(0, bidder.reward_func, bidder.alloc_func, bidder.bid_space, algo_util, t, num_auctions)) #print ("Regret inside" ) #print temp_regr return temp_regr
def main_dgrind(regress, principal, agents, oracle, resp_lst, curr_rep, T, num_agents, d): temp_regr = [] algo_loss = [] actions_taken = [] updated = [] updated_curr = [0] * principal.calA_size print("runner dgrind repetition: %d" % curr_rep) for t in range(T): print("Timestep t=%d" % t) cp_probs = deepcopy(principal.pi) (a_t, arm_chosen) = principal.choose_action() resp = agents[t].response(a_t, d) principal.loss_func[t] = oracle.compute_loss(resp_lst, t) if (not regress): in_probs = oracle.compute_in_probs(cp_probs, d, resp_lst, t) else: # no omnipotent oracle, only a regression one is available actions_taken.append( principal.calA[arm_chosen] ) # list containing all actions taken by principal updated_curr[arm_chosen] = 1 # flag for the current arm tot = 0.0 incl = [ 0 ] * principal.calA_size # unclear what it does at the moment for i in range(principal.calA_size ): # iterate over all of principal's actions a = principal.calA[i] dist = 1.0 * np.dot(a, resp) / np.linalg.norm( a[:d]) # compute distance of current point from action a if (i != arm_chosen): if (np.abs(dist) >= 2 * agents[t].delta): updated_curr[i] = 1 tot += principal.pi[ i] # builds the dataset for the logistic regression incl[i] = 1 else: updated_curr[i] = 0 else: if np.abs(dist) >= 2 * agents[t].delta: tot += principal.pi[i] incl[i] = 1 updated.append(updated_curr) in_probs = oracle.compute_in_probs_regr(cp_probs, d, t, updated, actions_taken, tot, incl) estimated_loss = ( 1.0 * principal.loss_func[t][arm_chosen]) / in_probs[arm_chosen] principal.est_loss[arm_chosen] += estimated_loss for i in range(principal.calA_size): a = principal.calA[i] if (i != arm_chosen): dist = 1.0 * np.dot(a, resp) / np.linalg.norm(a[:d]) if np.abs(dist) >= 2 * agents[t].delta: if (np.sign(dist * agents[t].label) == -1): principal.est_loss[i] += (1.0) / in_probs[i] else: principal.est_loss[i] += (0.0) / in_probs[i] # according to whether you have access to omnipotent oracle or just a regression one, eta changes accordingly if (not regress): eta = principal.eta_dgrind else: eta = principal.eta_dgrind_regress arr = np.array([(-eta) * principal.est_loss[i] for i in range(principal.calA_size)], dtype=np.float128) principal.weights = np.exp(arr) principal.pi = [ principal.weights[i] / sum(principal.weights) for i in range(principal.calA_size) ] # prevent division by almost 0 for j in range(principal.calA_size): if (principal.pi[j] < 0.00000001): principal.pi[j] = 0.00000001 # cumulative utility for the actions played algo_loss.append(principal.loss_func[t][arm_chosen]) temp_regr.append( regret(principal.loss_func, principal.calA, algo_loss, t)) return temp_regr