def main_winexp(bidder,curr_rep, T,num_bidders, num_slots, outcome_space, rank_scores, ctr, reserve, values,bids,threshold, noise,num_adaptive):
    algo_util = []
    temp_regr = []
    clean_alloc = [[] for _ in range(0,T)]
    clean_pay   = [[] for _ in range(0,T)]
    clean_reward = [[] for _ in range(0,T)]
    for t in range(0,T):
        bid_chosen = [bidder[i].bidding() for i in range(0,num_adaptive)]
        for i in range(0,num_adaptive): 
            bids[t][i] = bid_chosen[i]
        bid_vec = deepcopy(bids[t])
        gsp_instance =GSP(ctr[t], reserve[t], bid_vec, rank_scores[t], num_slots, num_bidders) 

        arm_chosen = [int(math.ceil(bids[t][i]/bidder[i].eps)) for i in range(0,num_adaptive)] 
        for i in range(0,num_adaptive):
            allocated = gsp_instance.alloc_func(bidder[i].id, bids[t][bidder[i].id])
            temp      = [gsp_instance.alloc_func(bidder[i].id, bid*bidder[i].eps)  for bid in range(0, bidder[i].bid_space)]
            if (i == 0):
                clean_alloc[t]          = deepcopy(temp)
                clean_pay[t]            = [gsp_instance.pay_func(bidder[i].id, bid*bidder[i].eps) for bid in range(0,bidder[i].bid_space)]
                
            temp_pay                = gsp_instance.pay_func(bidder[i].id, bid_vec[i])
            bidder[i].payment[t]    = temp_pay
            
            # bidder sees noisy data as his allocation
            noise_cp = deepcopy(noise)
            bidder[i].currbid[t]   = arm_chosen[i]*bidder[i].eps   
            if allocated > threshold[t]:
                bidder[i].allocated[t]                  = 1
                bidder[i].alloc_func[t]                 = compute_allocation_function(bidder[i].currbid[:t+1], bidder[i].allocated[:t+1], bidder[i].bid_space, bidder[i].eps)
                bidder[i].alloc_func[t][arm_chosen[i]]  = allocated
                bidder[i].pay_func[t]                   = compute_payment_function(bidder[i].currbid[:t+1], bidder[i].payment[:t+1], bidder[i].bid_space, bidder[i].eps)       
                bidder[i].pay_func[t][arm_chosen[i]]    = bidder[i].payment[t]
                temp_reward                             = [(values[t][0] - bidder[i].pay_func[t][b]) for b in range(0,bidder[i].bid_space)] 
                if (i == 0):
                    clean_reward[t]                     = [(values[t][0] - clean_pay[t][b]) for b in range(0,bidder[i].bid_space)]
                bidder[i].reward_func[t]   = normalize(temp_reward,bidder[i].bid_space,-1,1)
                bidder[i].utility[t]                    = (bidder[i].compute_utility(1, bidder[i].reward_func[t], bidder[i].alloc_func[t]))
            else:
                bidder[i].allocated[t] =0
                bidder[i].alloc_func[t]                 = compute_allocation_function(bidder[i].currbid[:t+1], bidder[i].allocated[:t+1], bidder[i].bid_space, bidder[i].eps)
                bidder[i].alloc_func[t][arm_chosen[i]]  = allocated
                bidder[i].payment[t]                    = 0 
                bidder[i].pay_func[t]                   = [0]*bidder[i].bid_space
                temp_reward                             = [0 for _ in range(0,bidder[i].bid_space)]
                bidder[i].reward_func[t]                = normalize(temp_reward,bidder[i].bid_space,-1,1)
                if (i == 0):
                    clean_reward[t]                     = [0 for _ in range(0,bidder[i].bid_space)]
                bidder[i].utility[t]                    = (bidder[i].compute_utility(0, bidder[i].reward_func[t], bidder[i].alloc_func[t]))
        
            (bidder[i].weights, bidder[i].pi) = bidder[i].weights_update_winexp(bidder[i].eta_winexp, bidder[i].utility[t])        
                

        algo_util.append((clean_reward[t][arm_chosen[0]]*clean_alloc[t][arm_chosen[0]]))
        temp_regr.append(regret(clean_reward,clean_alloc,bidder[0].bid_space, algo_util,t))    
        

    return temp_regr   
Example #2
0
def main_exp3(bidder, curr_rep, T, num_bidders, num_slots, outcome_space,
              rank_scores, ctr, reserve, values, bids, threshold, noise):
    algo_util = []
    temp_regr = []
    clean_alloc = [[] for _ in range(0, T)]
    for t in range(0, T):
        bid_chosen = bidder.bidding()
        bids[t][0] = bid_chosen
        bid_vec = deepcopy(bids[t])
        bidder.currbid[t] = bid_chosen
        currbid_cpy = deepcopy(bidder.currbid)

        gsp_instance = GSP(ctr[t], reserve[t], bid_vec, rank_scores[t],
                           num_slots, num_bidders)
        allocated = gsp_instance.alloc_func(bidder.id, bidder.currbid[t])
        bidder.payment[t] = gsp_instance.pay_func(bidder.id, currbid_cpy[t])
        clean_alloc[t] = [
            gsp_instance.alloc_func(bidder.id, b * bidder.eps)
            for b in range(0, bidder.bid_space)
        ]
        bidder.pay_func[t] = [
            gsp_instance.pay_func(bidder.id, b * bidder.eps)
            for b in range(0, bidder.bid_space)
        ]
        bidder.alloc_func[t] = deepcopy(clean_alloc[t])

        arm_chosen = int(math.ceil(bids[t][0] / bidder.eps))
        #reward function: value - payment(coming from GSP module)
        if allocated > threshold[t]:
            bidder.reward_func[t] = [(values[t][0] - bidder.pay_func[t][b])
                                     for b in range(0, bidder.bid_space)]
        else:
            bidder.reward_func[t] = [0 for _ in range(0, bidder.bid_space)]

        bidder.utility[t] = bidder.reward_func[t]

        #weights update

        if bidder.pi[arm_chosen] < 0.0000000001:
            bidder.pi[arm_chosen] = 0.0000000001
        estimated_loss = -bidder.utility[t][arm_chosen] / bidder.pi[arm_chosen]
        bidder.loss[arm_chosen] += estimated_loss
        arr = np.array([(-bidder.eta_exp3) * bidder.loss[b]
                        for b in range(0, bidder.bid_space)],
                       dtype=np.float128)
        bidder.weights = np.exp(arr)
        bidder.pi = [
            bidder.weights[b] / sum(bidder.weights)
            for b in range(0, bidder.bid_space)
        ]

        algo_util.append(
            (bidder.reward_func[t][arm_chosen] * clean_alloc[t][arm_chosen]))
        temp_regr.append(
            regret(bidder.reward_func, clean_alloc, bidder.bid_space,
                   algo_util, t))

    return temp_regr
def main_winexp(bidder, curr_rep, T, num_bidders, num_slots, outcome_space,
                rank_scores, ctr, reserve, values, bids, threshold, noise):
    algo_util = []
    temp_regr = []
    clean_alloc = [[] for _ in range(0, T)]
    for t in range(0, T):
        bid_chosen = bidder.bidding()
        bids[t][0] = bid_chosen
        bid_vec = deepcopy(bids[t])
        gsp_instance = GSP(ctr[t], reserve[t], bid_vec, rank_scores[t],
                           num_slots, num_bidders)
        # this is not reported to the bidder, and thus is cleaned of noise
        allocated = gsp_instance.alloc_func(bidder.id, bids[t][bidder.id])
        clean_alloc[t] = [
            gsp_instance.alloc_func(bidder.id, bid * bidder.eps)
            for bid in range(0, bidder.bid_space)
        ]

        temp = deepcopy(clean_alloc[t])

        # bidder sees noisy data as his allocation
        noise_cp = deepcopy(noise)
        bidder.alloc_func[t] = noise_mask(temp, noise_cp[t], ctr[t], num_slots)

        #reward function: value - payment(coming from GSP module)
        bidder.pay_func[t] = [
            gsp_instance.pay_func(bidder.id, bid * bidder.eps)
            for bid in range(0, bidder.bid_space)
        ]
        #### WIN-EXP computations ####
        # computation of reward will only be used for the regret
        if allocated > threshold[t]:
            bidder.reward_func[t] = [(values[t][0] - bidder.pay_func[t][b])
                                     for b in range(0, bidder.bid_space)]
            bidder.utility[t] = bidder.compute_utility(1,
                                                       bidder.reward_func[t],
                                                       bidder.alloc_func[t])
        else:
            bidder.reward_func[t] = [0 for _ in range(0, bidder.bid_space)]
            bidder.utility[t] = (bidder.compute_utility(
                0, bidder.reward_func[t], bidder.alloc_func[t]))

        (bidder.weights,
         bidder.pi) = bidder.weights_update_winexp(bidder.eta_winexp,
                                                   bidder.utility[t])
        # for each auction (at the same t) you choose the same arm
        arm_chosen = int(math.ceil(bids[t][bidder.id] / bidder.eps))

        algo_util.append(
            (bidder.reward_func[t][arm_chosen] * clean_alloc[t][arm_chosen]))
        temp_regr.append(
            regret(bidder.reward_func, clean_alloc, bidder.bid_space,
                   algo_util, t))

    return temp_regr
Example #4
0
def main_exp3(bidder,curr_rep, T,num_bidders, num_slots, outcome_space, rank_scores, ctr, reserve, values,bids,threshold,noise,num_adaptive):
    algo_util  = []
    temp_regr  = []
    clean_alloc = [[] for _ in range(0,T)]
    for t in range(0,T):
        bid_chosen = [bidder[i].bidding() for i in range(0,num_adaptive)]
        for i in range(0,num_adaptive): 
            bids[t][i] = bid_chosen[i]
        bid_vec = deepcopy(bids[t])
        gsp_instance =GSP(ctr[t], reserve[t], bid_vec, rank_scores[t], num_slots, num_bidders) 
        arm_chosen =[0]*num_adaptive
        for i in range(0,num_adaptive):
            allocated = gsp_instance.alloc_func(bidder[i].id, bids[t][bidder[i].id])
            temp      = [gsp_instance.alloc_func(bidder[i].id, bid*bidder[i].eps)  for bid in range(0, bidder[i].bid_space)]
            if (i == 0):
                clean_alloc[t] = deepcopy(temp)        

            noise_cp = deepcopy(noise)
            bidder[i].alloc_func[t] = noise_mask(temp,noise_cp[t],ctr[t], num_slots)
            #reward function: value - payment(coming from GSP module)
            bidder[i].pay_func[t] = [gsp_instance.pay_func(bidder[i].id, bid*bidder[i].eps) for bid in range(0, bidder[i].bid_space)]  
            if (i == 0):
                if allocated > threshold[t]:    
                    bidder[i].reward_func[t] = [(values[t][i] - bidder[i].pay_func[t][b]) for b in range(0,bidder[i].bid_space)] 
                else:
                    bidder[i].reward_func[t] = [0 for _ in range(0,bidder[i].bid_space)]

                bidder[i].utility[t] = bidder[i].reward_func[t]

                #weights update
                arm_chosen[i] = int(math.ceil(bids[t][i]/bidder[i].eps))
                
                if bidder[i].pi[arm_chosen[i]] < 0.0000000001:
                    bidder[i].pi[arm_chosen[i]] = 0.0000000001
                estimated_loss = -bidder[i].utility[t][arm_chosen[i]]/bidder[i].pi[arm_chosen[i]]
                bidder[i].loss[arm_chosen[i]] += estimated_loss
                arr = np.array([(-bidder[i].eta_exp3)*bidder[i].loss[b] for b in range(0,bidder[i].bid_space)], dtype=np.float128)
                bidder[i].weights = np.exp(arr)
                bidder[i].pi = [bidder[i].weights[b]/sum(bidder[i].weights) for b in range(0,bidder[i].bid_space)]
            else: 
                if allocated > threshold[t]:    
                    bidder[i].reward_func[t] = [(values[t][0] - bidder[i].pay_func[t][b]) for b in range(0,bidder[i].bid_space)] 
                    bidder[i].utility[t] = bidder[i].compute_utility(1, bidder[i].reward_func[t], bidder[i].alloc_func[t])
                else:
                    bidder[i].reward_func[t] = [0 for _ in range(0,bidder[i].bid_space)]
                    bidder[i].utility[t] = (bidder[i].compute_utility(0, bidder[i].reward_func[t], bidder[i].alloc_func[t]))


                (bidder[i].weights, bidder[i].pi) = bidder[i].weights_update_winexp(bidder[i].eta_winexp, bidder[i].utility[t])        
        
        
        algo_util.append((bidder[0].reward_func[t][arm_chosen[0]]*clean_alloc[t][arm_chosen[0]]))
        temp_regr.append(regret(bidder[0].reward_func,clean_alloc,bidder[0].bid_space, algo_util,t))    

    return temp_regr   
def main_winexp(bidder, curr_rep, T, num_bidders, num_slots, outcome_space,
                rank_scores, ctr, reserve, values, bids, threshold, noise,
                num_adaptive):
    algo_util = []
    temp_regr = []
    clean_alloc = [[] for _ in range(0, T)]
    for t in range(0, T):
        bid_chosen = [bidder[i].bidding() for i in range(0, num_adaptive)]
        for i in range(0, num_adaptive):
            bids[t][i] = bid_chosen[i]
        bid_vec = deepcopy(bids[t])
        gsp_instance = GSP(ctr[t], reserve[t], bid_vec, rank_scores[t],
                           num_slots, num_bidders)
        # this is not reported to the bidder, and thus is cleaned of noise
        for i in range(0, num_adaptive):
            allocated = gsp_instance.alloc_func(bidder[i].id,
                                                bids[t][bidder[i].id])
            temp = [
                gsp_instance.alloc_func(bidder[i].id, bid * bidder[i].eps)
                for bid in range(0, bidder[i].bid_space)
            ]

            if (i == 0):
                clean_alloc[t] = deepcopy(temp)
            # bidder sees noisy data as his allocation
            noise_cp = deepcopy(noise)
            bidder[i].alloc_func[t] = noise_mask(temp, noise_cp[t], ctr[t],
                                                 num_slots)

            #reward function: value - payment(coming from GSP module)
            bidder[i].pay_func[t] = [
                gsp_instance.pay_func(bidder[i].id, bid * bidder[i].eps)
                for bid in range(0, bidder[i].bid_space)
            ]
            #### WIN-EXP computations ####
            # computation of reward will only be used for the regret
            if (i == 0):
                if allocated > threshold[t]:
                    bidder[i].reward_func[t] = [
                        (values[t][0] - bidder[i].pay_func[t][b])
                        for b in range(0, bidder[i].bid_space)
                    ]
                    bidder[i].utility[t] = bidder[i].compute_utility(
                        1, bidder[i].reward_func[t], bidder[i].alloc_func[t])
                else:
                    bidder[i].reward_func[t] = [
                        0 for _ in range(0, bidder[i].bid_space)
                    ]
                    bidder[i].utility[t] = (bidder[i].compute_utility(
                        0, bidder[i].reward_func[t], bidder[i].alloc_func[t]))

                (bidder[i].weights,
                 bidder[i].pi) = bidder[i].weights_update_winexp(
                     bidder[i].eta_winexp, bidder[i].utility[t])

            else:
                if allocated > threshold[t]:
                    bidder[i].reward_func[t] = [
                        (values[t][i] - bidder[i].pay_func[t][b])
                        for b in range(0, bidder[i].bid_space)
                    ]
                else:
                    bidder[i].reward_func[t] = [
                        0 for _ in range(0, bidder[i].bid_space)
                    ]

                bidder[i].utility[t] = bidder[i].reward_func[t]

                #weights update
                arm_chosen = int(math.ceil(bids[t][i] / bidder[i].eps))

                if bidder[i].pi[arm_chosen] < np.exp(-700):
                    bidder[i].pi[arm_chosen] = np.exp(-700)
                estimated_loss = -bidder[i].utility[t][arm_chosen] / bidder[
                    i].pi[arm_chosen]
                bidder[i].loss[arm_chosen] += estimated_loss
                arr = np.array([(-bidder[i].eta_exp3) * bidder[i].loss[b]
                                for b in range(0, bidder[i].bid_space)],
                               dtype=np.float128)
                bidder[i].weights = np.exp(arr)
                bidder[i].pi = [
                    bidder[i].weights[b] / sum(bidder[i].weights)
                    for b in range(0, bidder[i].bid_space)
                ]

        # for each auction (at the same t) you choose the same arm
        arm = int(math.ceil(bids[t][0] / bidder[0].eps))

        algo_util.append((bidder[0].reward_func[t][arm] * clean_alloc[t][arm]))
        temp_regr.append(
            regret(bidder[0].reward_func, clean_alloc, bidder[0].bid_space,
                   algo_util, t))

    return temp_regr