def main_winexp(bidder,curr_rep, T,num_bidders, num_slots, outcome_space, rank_scores, ctr, reserve, values,bids,threshold, noise,num_adaptive): algo_util = [] temp_regr = [] clean_alloc = [[] for _ in range(0,T)] clean_pay = [[] for _ in range(0,T)] clean_reward = [[] for _ in range(0,T)] for t in range(0,T): bid_chosen = [bidder[i].bidding() for i in range(0,num_adaptive)] for i in range(0,num_adaptive): bids[t][i] = bid_chosen[i] bid_vec = deepcopy(bids[t]) gsp_instance =GSP(ctr[t], reserve[t], bid_vec, rank_scores[t], num_slots, num_bidders) arm_chosen = [int(math.ceil(bids[t][i]/bidder[i].eps)) for i in range(0,num_adaptive)] for i in range(0,num_adaptive): allocated = gsp_instance.alloc_func(bidder[i].id, bids[t][bidder[i].id]) temp = [gsp_instance.alloc_func(bidder[i].id, bid*bidder[i].eps) for bid in range(0, bidder[i].bid_space)] if (i == 0): clean_alloc[t] = deepcopy(temp) clean_pay[t] = [gsp_instance.pay_func(bidder[i].id, bid*bidder[i].eps) for bid in range(0,bidder[i].bid_space)] temp_pay = gsp_instance.pay_func(bidder[i].id, bid_vec[i]) bidder[i].payment[t] = temp_pay # bidder sees noisy data as his allocation noise_cp = deepcopy(noise) bidder[i].currbid[t] = arm_chosen[i]*bidder[i].eps if allocated > threshold[t]: bidder[i].allocated[t] = 1 bidder[i].alloc_func[t] = compute_allocation_function(bidder[i].currbid[:t+1], bidder[i].allocated[:t+1], bidder[i].bid_space, bidder[i].eps) bidder[i].alloc_func[t][arm_chosen[i]] = allocated bidder[i].pay_func[t] = compute_payment_function(bidder[i].currbid[:t+1], bidder[i].payment[:t+1], bidder[i].bid_space, bidder[i].eps) bidder[i].pay_func[t][arm_chosen[i]] = bidder[i].payment[t] temp_reward = [(values[t][0] - bidder[i].pay_func[t][b]) for b in range(0,bidder[i].bid_space)] if (i == 0): clean_reward[t] = [(values[t][0] - clean_pay[t][b]) for b in range(0,bidder[i].bid_space)] bidder[i].reward_func[t] = normalize(temp_reward,bidder[i].bid_space,-1,1) bidder[i].utility[t] = (bidder[i].compute_utility(1, bidder[i].reward_func[t], bidder[i].alloc_func[t])) else: bidder[i].allocated[t] =0 bidder[i].alloc_func[t] = compute_allocation_function(bidder[i].currbid[:t+1], bidder[i].allocated[:t+1], bidder[i].bid_space, bidder[i].eps) bidder[i].alloc_func[t][arm_chosen[i]] = allocated bidder[i].payment[t] = 0 bidder[i].pay_func[t] = [0]*bidder[i].bid_space temp_reward = [0 for _ in range(0,bidder[i].bid_space)] bidder[i].reward_func[t] = normalize(temp_reward,bidder[i].bid_space,-1,1) if (i == 0): clean_reward[t] = [0 for _ in range(0,bidder[i].bid_space)] bidder[i].utility[t] = (bidder[i].compute_utility(0, bidder[i].reward_func[t], bidder[i].alloc_func[t])) (bidder[i].weights, bidder[i].pi) = bidder[i].weights_update_winexp(bidder[i].eta_winexp, bidder[i].utility[t]) algo_util.append((clean_reward[t][arm_chosen[0]]*clean_alloc[t][arm_chosen[0]])) temp_regr.append(regret(clean_reward,clean_alloc,bidder[0].bid_space, algo_util,t)) return temp_regr
def main_exp3(bidder, curr_rep, T, num_bidders, num_slots, outcome_space, rank_scores, ctr, reserve, values, bids, threshold, noise): algo_util = [] temp_regr = [] clean_alloc = [[] for _ in range(0, T)] for t in range(0, T): bid_chosen = bidder.bidding() bids[t][0] = bid_chosen bid_vec = deepcopy(bids[t]) bidder.currbid[t] = bid_chosen currbid_cpy = deepcopy(bidder.currbid) gsp_instance = GSP(ctr[t], reserve[t], bid_vec, rank_scores[t], num_slots, num_bidders) allocated = gsp_instance.alloc_func(bidder.id, bidder.currbid[t]) bidder.payment[t] = gsp_instance.pay_func(bidder.id, currbid_cpy[t]) clean_alloc[t] = [ gsp_instance.alloc_func(bidder.id, b * bidder.eps) for b in range(0, bidder.bid_space) ] bidder.pay_func[t] = [ gsp_instance.pay_func(bidder.id, b * bidder.eps) for b in range(0, bidder.bid_space) ] bidder.alloc_func[t] = deepcopy(clean_alloc[t]) arm_chosen = int(math.ceil(bids[t][0] / bidder.eps)) #reward function: value - payment(coming from GSP module) if allocated > threshold[t]: bidder.reward_func[t] = [(values[t][0] - bidder.pay_func[t][b]) for b in range(0, bidder.bid_space)] else: bidder.reward_func[t] = [0 for _ in range(0, bidder.bid_space)] bidder.utility[t] = bidder.reward_func[t] #weights update if bidder.pi[arm_chosen] < 0.0000000001: bidder.pi[arm_chosen] = 0.0000000001 estimated_loss = -bidder.utility[t][arm_chosen] / bidder.pi[arm_chosen] bidder.loss[arm_chosen] += estimated_loss arr = np.array([(-bidder.eta_exp3) * bidder.loss[b] for b in range(0, bidder.bid_space)], dtype=np.float128) bidder.weights = np.exp(arr) bidder.pi = [ bidder.weights[b] / sum(bidder.weights) for b in range(0, bidder.bid_space) ] algo_util.append( (bidder.reward_func[t][arm_chosen] * clean_alloc[t][arm_chosen])) temp_regr.append( regret(bidder.reward_func, clean_alloc, bidder.bid_space, algo_util, t)) return temp_regr
def main_winexp(bidder, curr_rep, T, num_bidders, num_slots, outcome_space, rank_scores, ctr, reserve, values, bids, threshold, noise): algo_util = [] temp_regr = [] clean_alloc = [[] for _ in range(0, T)] for t in range(0, T): bid_chosen = bidder.bidding() bids[t][0] = bid_chosen bid_vec = deepcopy(bids[t]) gsp_instance = GSP(ctr[t], reserve[t], bid_vec, rank_scores[t], num_slots, num_bidders) # this is not reported to the bidder, and thus is cleaned of noise allocated = gsp_instance.alloc_func(bidder.id, bids[t][bidder.id]) clean_alloc[t] = [ gsp_instance.alloc_func(bidder.id, bid * bidder.eps) for bid in range(0, bidder.bid_space) ] temp = deepcopy(clean_alloc[t]) # bidder sees noisy data as his allocation noise_cp = deepcopy(noise) bidder.alloc_func[t] = noise_mask(temp, noise_cp[t], ctr[t], num_slots) #reward function: value - payment(coming from GSP module) bidder.pay_func[t] = [ gsp_instance.pay_func(bidder.id, bid * bidder.eps) for bid in range(0, bidder.bid_space) ] #### WIN-EXP computations #### # computation of reward will only be used for the regret if allocated > threshold[t]: bidder.reward_func[t] = [(values[t][0] - bidder.pay_func[t][b]) for b in range(0, bidder.bid_space)] bidder.utility[t] = bidder.compute_utility(1, bidder.reward_func[t], bidder.alloc_func[t]) else: bidder.reward_func[t] = [0 for _ in range(0, bidder.bid_space)] bidder.utility[t] = (bidder.compute_utility( 0, bidder.reward_func[t], bidder.alloc_func[t])) (bidder.weights, bidder.pi) = bidder.weights_update_winexp(bidder.eta_winexp, bidder.utility[t]) # for each auction (at the same t) you choose the same arm arm_chosen = int(math.ceil(bids[t][bidder.id] / bidder.eps)) algo_util.append( (bidder.reward_func[t][arm_chosen] * clean_alloc[t][arm_chosen])) temp_regr.append( regret(bidder.reward_func, clean_alloc, bidder.bid_space, algo_util, t)) return temp_regr
def main_exp3(bidder,curr_rep, T,num_bidders, num_slots, outcome_space, rank_scores, ctr, reserve, values,bids,threshold,noise,num_adaptive): algo_util = [] temp_regr = [] clean_alloc = [[] for _ in range(0,T)] for t in range(0,T): bid_chosen = [bidder[i].bidding() for i in range(0,num_adaptive)] for i in range(0,num_adaptive): bids[t][i] = bid_chosen[i] bid_vec = deepcopy(bids[t]) gsp_instance =GSP(ctr[t], reserve[t], bid_vec, rank_scores[t], num_slots, num_bidders) arm_chosen =[0]*num_adaptive for i in range(0,num_adaptive): allocated = gsp_instance.alloc_func(bidder[i].id, bids[t][bidder[i].id]) temp = [gsp_instance.alloc_func(bidder[i].id, bid*bidder[i].eps) for bid in range(0, bidder[i].bid_space)] if (i == 0): clean_alloc[t] = deepcopy(temp) noise_cp = deepcopy(noise) bidder[i].alloc_func[t] = noise_mask(temp,noise_cp[t],ctr[t], num_slots) #reward function: value - payment(coming from GSP module) bidder[i].pay_func[t] = [gsp_instance.pay_func(bidder[i].id, bid*bidder[i].eps) for bid in range(0, bidder[i].bid_space)] if (i == 0): if allocated > threshold[t]: bidder[i].reward_func[t] = [(values[t][i] - bidder[i].pay_func[t][b]) for b in range(0,bidder[i].bid_space)] else: bidder[i].reward_func[t] = [0 for _ in range(0,bidder[i].bid_space)] bidder[i].utility[t] = bidder[i].reward_func[t] #weights update arm_chosen[i] = int(math.ceil(bids[t][i]/bidder[i].eps)) if bidder[i].pi[arm_chosen[i]] < 0.0000000001: bidder[i].pi[arm_chosen[i]] = 0.0000000001 estimated_loss = -bidder[i].utility[t][arm_chosen[i]]/bidder[i].pi[arm_chosen[i]] bidder[i].loss[arm_chosen[i]] += estimated_loss arr = np.array([(-bidder[i].eta_exp3)*bidder[i].loss[b] for b in range(0,bidder[i].bid_space)], dtype=np.float128) bidder[i].weights = np.exp(arr) bidder[i].pi = [bidder[i].weights[b]/sum(bidder[i].weights) for b in range(0,bidder[i].bid_space)] else: if allocated > threshold[t]: bidder[i].reward_func[t] = [(values[t][0] - bidder[i].pay_func[t][b]) for b in range(0,bidder[i].bid_space)] bidder[i].utility[t] = bidder[i].compute_utility(1, bidder[i].reward_func[t], bidder[i].alloc_func[t]) else: bidder[i].reward_func[t] = [0 for _ in range(0,bidder[i].bid_space)] bidder[i].utility[t] = (bidder[i].compute_utility(0, bidder[i].reward_func[t], bidder[i].alloc_func[t])) (bidder[i].weights, bidder[i].pi) = bidder[i].weights_update_winexp(bidder[i].eta_winexp, bidder[i].utility[t]) algo_util.append((bidder[0].reward_func[t][arm_chosen[0]]*clean_alloc[t][arm_chosen[0]])) temp_regr.append(regret(bidder[0].reward_func,clean_alloc,bidder[0].bid_space, algo_util,t)) return temp_regr
def main_winexp(bidder, curr_rep, T, num_bidders, num_slots, outcome_space, rank_scores, ctr, reserve, values, bids, threshold, noise, num_adaptive): algo_util = [] temp_regr = [] clean_alloc = [[] for _ in range(0, T)] for t in range(0, T): bid_chosen = [bidder[i].bidding() for i in range(0, num_adaptive)] for i in range(0, num_adaptive): bids[t][i] = bid_chosen[i] bid_vec = deepcopy(bids[t]) gsp_instance = GSP(ctr[t], reserve[t], bid_vec, rank_scores[t], num_slots, num_bidders) # this is not reported to the bidder, and thus is cleaned of noise for i in range(0, num_adaptive): allocated = gsp_instance.alloc_func(bidder[i].id, bids[t][bidder[i].id]) temp = [ gsp_instance.alloc_func(bidder[i].id, bid * bidder[i].eps) for bid in range(0, bidder[i].bid_space) ] if (i == 0): clean_alloc[t] = deepcopy(temp) # bidder sees noisy data as his allocation noise_cp = deepcopy(noise) bidder[i].alloc_func[t] = noise_mask(temp, noise_cp[t], ctr[t], num_slots) #reward function: value - payment(coming from GSP module) bidder[i].pay_func[t] = [ gsp_instance.pay_func(bidder[i].id, bid * bidder[i].eps) for bid in range(0, bidder[i].bid_space) ] #### WIN-EXP computations #### # computation of reward will only be used for the regret if (i == 0): if allocated > threshold[t]: bidder[i].reward_func[t] = [ (values[t][0] - bidder[i].pay_func[t][b]) for b in range(0, bidder[i].bid_space) ] bidder[i].utility[t] = bidder[i].compute_utility( 1, bidder[i].reward_func[t], bidder[i].alloc_func[t]) else: bidder[i].reward_func[t] = [ 0 for _ in range(0, bidder[i].bid_space) ] bidder[i].utility[t] = (bidder[i].compute_utility( 0, bidder[i].reward_func[t], bidder[i].alloc_func[t])) (bidder[i].weights, bidder[i].pi) = bidder[i].weights_update_winexp( bidder[i].eta_winexp, bidder[i].utility[t]) else: if allocated > threshold[t]: bidder[i].reward_func[t] = [ (values[t][i] - bidder[i].pay_func[t][b]) for b in range(0, bidder[i].bid_space) ] else: bidder[i].reward_func[t] = [ 0 for _ in range(0, bidder[i].bid_space) ] bidder[i].utility[t] = bidder[i].reward_func[t] #weights update arm_chosen = int(math.ceil(bids[t][i] / bidder[i].eps)) if bidder[i].pi[arm_chosen] < np.exp(-700): bidder[i].pi[arm_chosen] = np.exp(-700) estimated_loss = -bidder[i].utility[t][arm_chosen] / bidder[ i].pi[arm_chosen] bidder[i].loss[arm_chosen] += estimated_loss arr = np.array([(-bidder[i].eta_exp3) * bidder[i].loss[b] for b in range(0, bidder[i].bid_space)], dtype=np.float128) bidder[i].weights = np.exp(arr) bidder[i].pi = [ bidder[i].weights[b] / sum(bidder[i].weights) for b in range(0, bidder[i].bid_space) ] # for each auction (at the same t) you choose the same arm arm = int(math.ceil(bids[t][0] / bidder[0].eps)) algo_util.append((bidder[0].reward_func[t][arm] * clean_alloc[t][arm])) temp_regr.append( regret(bidder[0].reward_func, clean_alloc, bidder[0].bid_space, algo_util, t)) return temp_regr