Python regret Beispiele

Beispiel #1

0

Datei anzeigen

Datei: runner_exp3.py Projekt: charapod/learn-strat-class

def main_exp3(principal, agents, oracle, resp_lst, curr_rep, T, num_agents, d):
    temp_regr = []
    algo_loss = []
    # resp_lst: |calA| x T
    for t in range(T):
        (a_t, arm_chosen) = principal.choose_action()
        resp = agents[t].response(a_t, d)
        principal.loss_func[t] = oracle.compute_loss(resp_lst, t)
        estimated_loss = 1.0 * principal.loss_func[t][
            arm_chosen] / principal.pi[arm_chosen]
        principal.est_loss[arm_chosen] += estimated_loss
        arr = np.array([(-principal.eta_exp3) * principal.est_loss[i]
                        for i in range(principal.calA_size)],
                       dtype=np.float128)
        principal.weights = np.exp(arr)
        principal.pi = [
            principal.weights[i] / sum(principal.weights)
            for i in range(principal.calA_size)
        ]
        # prevent division by almost 0
        for j in range(principal.calA_size):
            if (principal.pi[j] < 0.00000001):
                principal.pi[j] = 0.00000001

        algo_loss.append(principal.loss_func[t][arm_chosen])
        temp_regr.append(
            regret(principal.loss_func, principal.calA, algo_loss, t))

    return temp_regr

Beispiel #2

0

Datei anzeigen

def main_exp3(bidder, curr_rep, T, num_bidders, num_slots, outcome_space,
              rank_scores, ctr, reserve, values, bids, threshold, noise):
    algo_util = []
    temp_regr = []
    clean_alloc = [[] for _ in range(0, T)]
    for t in range(0, T):
        bid_chosen = bidder.bidding()
        bids[t][0] = bid_chosen
        bid_vec = deepcopy(bids[t])
        gsp_instance = GSP(ctr[t], reserve[t], bid_vec, rank_scores[t],
                           num_slots, num_bidders)
        allocated = gsp_instance.alloc_func(bidder.id, bids[t][bidder.id])
        clean_alloc[t] = [
            gsp_instance.alloc_func(bidder.id, bid * bidder.eps)
            for bid in range(0, bidder.bid_space)
        ]

        temp_alloc = deepcopy(clean_alloc[t])

        noise_cp = deepcopy(noise)
        bidder.alloc_func[t] = noise_mask(temp_alloc, noise_cp[t], ctr[t],
                                          num_slots)
        #reward function: value - payment(coming from GSP module)
        bidder.pay_func[t] = [
            gsp_instance.pay_func(bidder.id, bid * bidder.eps)
            for bid in range(0, bidder.bid_space)
        ]
        if allocated > threshold[t]:
            bidder.reward_func[t] = [(values[t][0] - bidder.pay_func[t][b])
                                     for b in range(0, bidder.bid_space)]
        else:
            bidder.reward_func[t] = [0 for _ in range(0, bidder.bid_space)]

        bidder.utility[t] = normalize(bidder.reward_func[t], bidder.bid_space,
                                      0, 1)

        #weights update
        arm_chosen = int(math.ceil(bids[t][0] / bidder.eps))

        if bidder.pi[arm_chosen] < 0.0000000001:
            bidder.pi[arm_chosen] = 0.0000000001
        estimated_loss = bidder.utility[t][arm_chosen] / bidder.pi[arm_chosen]
        bidder.loss[arm_chosen] += estimated_loss
        arr = np.array([(-bidder.eta_exp3) * bidder.loss[b]
                        for b in range(0, bidder.bid_space)],
                       dtype=np.float128)
        bidder.weights = np.exp(arr)
        bidder.pi = [
            bidder.weights[b] / sum(bidder.weights)
            for b in range(0, bidder.bid_space)
        ]

        #algo_util.append((bidder.reward_func[t][arm_chosen]*bidder.alloc_func[t][arm_chosen]))
        algo_util.append(
            (bidder.reward_func[t][arm_chosen] * clean_alloc[t][arm_chosen]))
        temp_regr.append(
            regret(bidder.reward_func, clean_alloc, bidder.bid_space,
                   algo_util, t))

    return temp_regr

Beispiel #3

0

Datei anzeigen

Datei: runner_winexp_all_bidders.py Projekt: zfengharvard/bandit-sponsored-search

def main_winexp(bidder,curr_rep, T,num_bidders, num_slots, outcome_space, rank_scores, ctr, reserve, values,bids,threshold, noise,num_adaptive):
    algo_util = []
    temp_regr = []
    clean_alloc = [[] for _ in range(0,T)]
    clean_pay   = [[] for _ in range(0,T)]
    clean_reward = [[] for _ in range(0,T)]
    for t in range(0,T):
        bid_chosen = [bidder[i].bidding() for i in range(0,num_adaptive)]
        for i in range(0,num_adaptive): 
            bids[t][i] = bid_chosen[i]
        bid_vec = deepcopy(bids[t])
        gsp_instance =GSP(ctr[t], reserve[t], bid_vec, rank_scores[t], num_slots, num_bidders) 

        arm_chosen = [int(math.ceil(bids[t][i]/bidder[i].eps)) for i in range(0,num_adaptive)] 
        for i in range(0,num_adaptive):
            allocated = gsp_instance.alloc_func(bidder[i].id, bids[t][bidder[i].id])
            temp      = [gsp_instance.alloc_func(bidder[i].id, bid*bidder[i].eps)  for bid in range(0, bidder[i].bid_space)]
            if (i == 0):
                clean_alloc[t]          = deepcopy(temp)
                clean_pay[t]            = [gsp_instance.pay_func(bidder[i].id, bid*bidder[i].eps) for bid in range(0,bidder[i].bid_space)]
                
            temp_pay                = gsp_instance.pay_func(bidder[i].id, bid_vec[i])
            bidder[i].payment[t]    = temp_pay
            
            # bidder sees noisy data as his allocation
            noise_cp = deepcopy(noise)
            bidder[i].currbid[t]   = arm_chosen[i]*bidder[i].eps   
            if allocated > threshold[t]:
                bidder[i].allocated[t]                  = 1
                bidder[i].alloc_func[t]                 = compute_allocation_function(bidder[i].currbid[:t+1], bidder[i].allocated[:t+1], bidder[i].bid_space, bidder[i].eps)
                bidder[i].alloc_func[t][arm_chosen[i]]  = allocated
                bidder[i].pay_func[t]                   = compute_payment_function(bidder[i].currbid[:t+1], bidder[i].payment[:t+1], bidder[i].bid_space, bidder[i].eps)       
                bidder[i].pay_func[t][arm_chosen[i]]    = bidder[i].payment[t]
                temp_reward                             = [(values[t][0] - bidder[i].pay_func[t][b]) for b in range(0,bidder[i].bid_space)] 
                if (i == 0):
                    clean_reward[t]                     = [(values[t][0] - clean_pay[t][b]) for b in range(0,bidder[i].bid_space)]
                bidder[i].reward_func[t]   = normalize(temp_reward,bidder[i].bid_space,-1,1)
                bidder[i].utility[t]                    = (bidder[i].compute_utility(1, bidder[i].reward_func[t], bidder[i].alloc_func[t]))
            else:
                bidder[i].allocated[t] =0
                bidder[i].alloc_func[t]                 = compute_allocation_function(bidder[i].currbid[:t+1], bidder[i].allocated[:t+1], bidder[i].bid_space, bidder[i].eps)
                bidder[i].alloc_func[t][arm_chosen[i]]  = allocated
                bidder[i].payment[t]                    = 0 
                bidder[i].pay_func[t]                   = [0]*bidder[i].bid_space
                temp_reward                             = [0 for _ in range(0,bidder[i].bid_space)]
                bidder[i].reward_func[t]                = normalize(temp_reward,bidder[i].bid_space,-1,1)
                if (i == 0):
                    clean_reward[t]                     = [0 for _ in range(0,bidder[i].bid_space)]
                bidder[i].utility[t]                    = (bidder[i].compute_utility(0, bidder[i].reward_func[t], bidder[i].alloc_func[t]))
        
            (bidder[i].weights, bidder[i].pi) = bidder[i].weights_update_winexp(bidder[i].eta_winexp, bidder[i].utility[t])        
                

        algo_util.append((clean_reward[t][arm_chosen[0]]*clean_alloc[t][arm_chosen[0]]))
        temp_regr.append(regret(clean_reward,clean_alloc,bidder[0].bid_space, algo_util,t))    
        

    return temp_regr

Beispiel #4

0

Datei anzeigen

Datei: runner_winexp_all_bidders.py Projekt: zfengharvard/bandit-sponsored-search

def main_winexp(bidder, curr_rep, T, num_bidders, num_slots, outcome_space,
                rank_scores, ctr, reserve, values, bids, threshold, noise):
    algo_util = []
    temp_regr = []
    clean_alloc = [[] for _ in range(0, T)]
    for t in range(0, T):
        bid_chosen = bidder.bidding()
        bids[t][0] = bid_chosen
        bid_vec = deepcopy(bids[t])
        gsp_instance = GSP(ctr[t], reserve[t], bid_vec, rank_scores[t],
                           num_slots, num_bidders)
        # this is not reported to the bidder, and thus is cleaned of noise
        allocated = gsp_instance.alloc_func(bidder.id, bids[t][bidder.id])
        clean_alloc[t] = [
            gsp_instance.alloc_func(bidder.id, bid * bidder.eps)
            for bid in range(0, bidder.bid_space)
        ]

        temp = deepcopy(clean_alloc[t])

        # bidder sees noisy data as his allocation
        noise_cp = deepcopy(noise)
        bidder.alloc_func[t] = noise_mask(temp, noise_cp[t], ctr[t], num_slots)

        #reward function: value - payment(coming from GSP module)
        bidder.pay_func[t] = [
            gsp_instance.pay_func(bidder.id, bid * bidder.eps)
            for bid in range(0, bidder.bid_space)
        ]
        #### WIN-EXP computations ####
        # computation of reward will only be used for the regret
        if allocated > threshold[t]:
            bidder.reward_func[t] = [(values[t][0] - bidder.pay_func[t][b])
                                     for b in range(0, bidder.bid_space)]
            bidder.utility[t] = bidder.compute_utility(1,
                                                       bidder.reward_func[t],
                                                       bidder.alloc_func[t])
        else:
            bidder.reward_func[t] = [0 for _ in range(0, bidder.bid_space)]
            bidder.utility[t] = (bidder.compute_utility(
                0, bidder.reward_func[t], bidder.alloc_func[t]))

        (bidder.weights,
         bidder.pi) = bidder.weights_update_winexp(bidder.eta_winexp,
                                                   bidder.utility[t])
        # for each auction (at the same t) you choose the same arm
        arm_chosen = int(math.ceil(bids[t][bidder.id] / bidder.eps))

        algo_util.append(
            (bidder.reward_func[t][arm_chosen] * clean_alloc[t][arm_chosen]))
        temp_regr.append(
            regret(bidder.reward_func, clean_alloc, bidder.bid_space,
                   algo_util, t))

    return temp_regr

Beispiel #5

0

Datei anzeigen

def main_exp3(bidder,curr_rep, T,num_bidders, num_slots, outcome_space, rank_scores, ctr, reserve, values,bids,threshold,noise,num_adaptive):
    algo_util  = []
    temp_regr  = []
    clean_alloc = [[] for _ in range(0,T)]
    for t in range(0,T):
        bid_chosen = [bidder[i].bidding() for i in range(0,num_adaptive)]
        for i in range(0,num_adaptive): 
            bids[t][i] = bid_chosen[i]
        bid_vec = deepcopy(bids[t])
        gsp_instance =GSP(ctr[t], reserve[t], bid_vec, rank_scores[t], num_slots, num_bidders) 
        arm_chosen =[0]*num_adaptive
        for i in range(0,num_adaptive):
            allocated = gsp_instance.alloc_func(bidder[i].id, bids[t][bidder[i].id])
            temp      = [gsp_instance.alloc_func(bidder[i].id, bid*bidder[i].eps)  for bid in range(0, bidder[i].bid_space)]
            if (i == 0):
                clean_alloc[t] = deepcopy(temp)        

            noise_cp = deepcopy(noise)
            bidder[i].alloc_func[t] = noise_mask(temp,noise_cp[t],ctr[t], num_slots)
            #reward function: value - payment(coming from GSP module)
            bidder[i].pay_func[t] = [gsp_instance.pay_func(bidder[i].id, bid*bidder[i].eps) for bid in range(0, bidder[i].bid_space)]  
            if (i == 0):
                if allocated > threshold[t]:    
                    bidder[i].reward_func[t] = [(values[t][i] - bidder[i].pay_func[t][b]) for b in range(0,bidder[i].bid_space)] 
                else:
                    bidder[i].reward_func[t] = [0 for _ in range(0,bidder[i].bid_space)]

                bidder[i].utility[t] = bidder[i].reward_func[t]

                #weights update
                arm_chosen[i] = int(math.ceil(bids[t][i]/bidder[i].eps))
                
                if bidder[i].pi[arm_chosen[i]] < 0.0000000001:
                    bidder[i].pi[arm_chosen[i]] = 0.0000000001
                estimated_loss = -bidder[i].utility[t][arm_chosen[i]]/bidder[i].pi[arm_chosen[i]]
                bidder[i].loss[arm_chosen[i]] += estimated_loss
                arr = np.array([(-bidder[i].eta_exp3)*bidder[i].loss[b] for b in range(0,bidder[i].bid_space)], dtype=np.float128)
                bidder[i].weights = np.exp(arr)
                bidder[i].pi = [bidder[i].weights[b]/sum(bidder[i].weights) for b in range(0,bidder[i].bid_space)]
            else: 
                if allocated > threshold[t]:    
                    bidder[i].reward_func[t] = [(values[t][0] - bidder[i].pay_func[t][b]) for b in range(0,bidder[i].bid_space)] 
                    bidder[i].utility[t] = bidder[i].compute_utility(1, bidder[i].reward_func[t], bidder[i].alloc_func[t])
                else:
                    bidder[i].reward_func[t] = [0 for _ in range(0,bidder[i].bid_space)]
                    bidder[i].utility[t] = (bidder[i].compute_utility(0, bidder[i].reward_func[t], bidder[i].alloc_func[t]))


                (bidder[i].weights, bidder[i].pi) = bidder[i].weights_update_winexp(bidder[i].eta_winexp, bidder[i].utility[t])        
        
        
        algo_util.append((bidder[0].reward_func[t][arm_chosen[0]]*clean_alloc[t][arm_chosen[0]]))
        temp_regr.append(regret(bidder[0].reward_func,clean_alloc,bidder[0].bid_space, algo_util,t))    

    return temp_regr

Beispiel #6

0

Datei anzeigen

Datei: hedge_runner.py Projekt: lorem-ipsum1111/noRegret

def main_hedge(num_experts, outcomes, experts_reports, T, rep, sample_id):
    hedge_weights = [1.0] * num_experts
    hedge_probs = [1.0 / num_experts] * num_experts

    experts_loss_lst = [[0] * num_experts for _ in range(T)]
    hedge_loss = [0] * T
    avg_loss = [0] * T
    hedge_weighted_loss = [0] * T
    hedge_rep_regr = []

    eta = 1.0 / 4.0
    for t in range(T):
        print("Timestep t=%d for Hedge" % t)
        exp_chosen = draw(hedge_probs, 0, num_experts)
        experts_loss_lst[t] = [(outcomes[t] - experts_reports[i][t])**2
                               for i in range(num_experts)]
        hedge_loss[t] = experts_loss_lst[t][exp_chosen]
        # average report for this round
        avg_rep = 1.0 * sum(
            [experts_reports[i][t] for i in range(num_experts)]) / num_experts
        hedge_weighted_rep = 1.0 * sum([
            hedge_probs[i] * experts_reports[i][t] for i in range(num_experts)
        ])
        hedge_weighted_loss[t] = (outcomes[t] - hedge_weighted_rep)**2

        # weight update
        temp = [
            hedge_weights[i] * (np.exp(-eta * experts_loss_lst[t][i]))
            for i in range(num_experts)
        ]
        hedge_weights = temp

        # probs update
        hedge_probs = [
            1.0 * hedge_weights[i] / sum(hedge_weights)
            for i in range(num_experts)
        ]

        (regr_best, bf) = regret(experts_loss_lst, num_experts, hedge_loss, t)

        hedge_rep_regr.append(regr_best)

    return (sample_id, num_experts, hedge_rep_regr,
            [sum(hedge_loss[:t + 1]) for t in range(T)
             ], [sum(hedge_weighted_loss[:t + 1]) for t in range(T)])

Beispiel #7

0

Datei anzeigen

Datei: wsux_runner.py Projekt: lorem-ipsum1111/noRegret

def main_wsux(num_experts, outcomes, experts_reports, T, rep, sample_id):
    wsux_probs = [1.0 / num_experts] * num_experts
    experts_loss_lst = [[0] * num_experts for _ in range(T)]
    wsux_loss = [0] * T
    wsux_weighted_loss = [0] * T
    wsux_rep_regr = []
    best_fixed_loss = []
    est_loss = [[0] * num_experts for _ in range(T)]

    eta = (1.0 * np.log(num_experts) / (4 * np.sqrt(num_experts) * T))**(2.0 /
                                                                         3.0)
    gamma = np.sqrt(1.0 * eta * num_experts)

    for t in range(T):
        print("Timestep t=%d for WSU-UX" % t)
        exp_chosen = draw(wsux_probs, gamma, num_experts)

        experts_loss_lst[t] = [(outcomes[t] - experts_reports[i][t])**2
                               for i in range(num_experts)]
        wsux_loss[t] = experts_loss_lst[t][exp_chosen]
        est_loss[t][exp_chosen] = 1.0 * experts_loss_lst[t][
            exp_chosen] / wsux_probs[exp_chosen]

        # probs update through wswm
        cpy = deepcopy(wsux_probs)
        temp = wswm_compute(wsux_probs, experts_reports, num_experts, outcomes,
                            t)
        wsux_probs = [
            eta * temp[i] + (1.0 - eta) * cpy[i] for i in range(num_experts)
        ]

        (regr_best, best_fixed) = regret(experts_loss_lst, num_experts,
                                         wsux_loss, t)

        wsux_rep_regr.append(regr_best)
        best_fixed_loss.append(best_fixed)

    return (sample_id, num_experts, wsux_rep_regr,
            [sum(wsux_loss[:t + 1]) for t in range(T)
             ], [sum(wsux_weighted_loss[:t + 1])
                 for t in range(T)], best_fixed_loss)

Beispiel #8

0

Datei anzeigen

Datei: elf_runner.py Projekt: charapod/noregr-and-ic

def main_elf(num_experts, outcomes, experts_reports, T, rep, sample_id):
    experts_loss_lst = [[] for _ in range(T)]
    elf_loss = [0] * T
    avg_loss = [0] * T
    elf_rep_regr = []
    best_fixed_loss = []
    wins_for_master_file = [[0 for _ in range(T)] for _ in range(num_experts)]

    elf_probs_lst = [[] for _ in range(T)]

    elf_probs_lst[0] = [1.0 / num_experts] * num_experts

    for t in range(T):
        print("Timestep t=%d for ELF." % t)
        exp_chosen_lst = draw_rec(elf_probs_lst, 0.0, t)
        experts_loss_lst[t] = [(outcomes[t] - experts_reports[i][t])**2
                               for i in range(num_experts)]

        # at the current timestep, choose the expert with the most wins
        (curr_exp_chosen, wins_lst) = most_wins(exp_chosen_lst, num_experts)
        elf_loss[t] = experts_loss_lst[t][curr_exp_chosen]
        wins_for_master_file[curr_exp_chosen][t] += 1
        # update the elf probs lst
        wagers = [1.0 / num_experts] * num_experts
        new_probs_lst = [
            a for a in wswm_compute(wagers, experts_reports, num_experts,
                                    outcomes, t)
        ]
        elf_probs_lst[t] = new_probs_lst

        # regret computations
        (regr_best, best_fixed) = regret(experts_loss_lst, num_experts,
                                         elf_loss, t)

        elf_rep_regr.append(regr_best)
        best_fixed_loss.append(best_fixed)

    return (sample_id, num_experts, elf_rep_regr,
            [sum(elf_loss[:t + 1])
             for t in range(T)], wins_for_master_file, best_fixed_loss)

Beispiel #9

0

Datei anzeigen

Datei: wsu_runner.py Projekt: lorem-ipsum1111/noRegret

def main_wswm(num_experts, outcomes, experts_reports, T, rep, sample_id):
    wswm_probs = [1.0 / num_experts] * num_experts
    experts_loss_lst = [[] for _ in range(T)]
    wswm_loss = [0] * T
    wswm_weighted_loss = [0] * T
    wswm_rep_regr = []
    best_fixed_loss = []

    eta = np.sqrt(1.0 * np.log(num_experts) / (1.0 * T))
    for t in range(T):
        print("Timestep t=%d for WSWM" % t)
        exp_chosen = draw(wswm_probs, 0, num_experts)

        experts_loss_lst[t] = [(outcomes[t] - experts_reports[i][t])**2
                               for i in range(num_experts)]
        wswm_loss[t] = experts_loss_lst[t][exp_chosen]
        # loss of <wswm weighted avg rep> - loss of <simple avg rep>
        wswm_weighted_rep = 1.0 * sum([
            wswm_probs[i] * experts_reports[i][t] for i in range(num_experts)
        ])
        wswm_weighted_loss[t] = (outcomes[t] - wswm_weighted_rep)**2

        # probs update through wswm
        cpy = deepcopy(wswm_probs)
        temp = wswm_compute(wswm_probs, experts_reports, num_experts, outcomes,
                            t)
        wswm_probs = [
            eta * temp[i] + (1.0 - eta) * cpy[i] for i in range(num_experts)
        ]

        (regr_best, best_fixed) = regret(experts_loss_lst, num_experts,
                                         wswm_loss, t)

        wswm_rep_regr.append(regr_best)
        best_fixed_loss.append(best_fixed)

    return (sample_id, num_experts, wswm_rep_regr,
            [sum(wswm_loss[:t + 1]) for t in range(T)
             ], [sum(wswm_weighted_loss[:t + 1])
                 for t in range(T)], best_fixed_loss)

Beispiel #10

0

Datei anzeigen

Datei: exp3_runner.py Projekt: lorem-ipsum1111/noRegret

def main_exp3(num_experts, outcomes, experts_reports, T, rep, sample_id):
    exp3_weights = [1.0] * num_experts
    exp3_probs = [1.0 / num_experts] * num_experts

    experts_loss_lst = [[0] * num_experts for _ in range(T)]
    est_loss = [[0] * num_experts for _ in range(T)]
    avg_loss = [0] * T
    exp3_loss = [0] * T
    exp3_rep_regr = []

    eta = np.sqrt(2 * np.log(num_experts) / (num_experts * T))
    for t in range(T):
        print("Timestep t=%d for EXP3" % t)
        exp_chosen = draw(exp3_probs, 0, num_experts)
        experts_loss_lst[t] = [(outcomes[t] - experts_reports[i][t])**2
                               for i in range(num_experts)]
        # unbiased estimator
        est_loss[t][exp_chosen] = 1.0 * experts_loss_lst[t][
            exp_chosen] / exp3_probs[exp_chosen]
        exp3_loss[t] = experts_loss_lst[t][exp_chosen]

        # weight update according to the estimated losses
        temp = [
            exp3_weights[i] * np.exp(-eta * est_loss[t][i])
            for i in range(num_experts)
        ]
        exp3_weights = temp

        # probs update
        exp3_probs = [
            1.0 * exp3_weights[i] / sum(exp3_weights)
            for i in range(num_experts)
        ]

        (regr_best, bf) = regret(experts_loss_lst, num_experts, exp3_loss, t)

        exp3_rep_regr.append(regr_best)

    return (sample_id, num_experts, exp3_rep_regr,
            [sum(exp3_loss[:t + 1]) for t in range(T)])

Beispiel #11

0

Datei anzeigen

Datei: mwu_runner.py Projekt: lorem-ipsum1111/noRegret

def main_mwu(num_experts, outcomes, experts_reports, T, rep, sample_id):
    mwu_weights = [1.0]*num_experts
    mwu_probs   = [1.0/num_experts]*num_experts

    experts_loss_lst = [[0]*num_experts for _ in range(T)]
    mwu_loss     = [0]*T
    avg_loss     = [0]*T
    mwu_weighted_loss = [0]*T    
    uniform_fixed_loss = [0]*T
    mwu_rep_regr = []


    eta = np.sqrt(1.0*np.log(num_experts)/(1.0*T))
    for t in range(T): 
        print ("Timestep t=%d for MWU"%t)
        exp_chosen = draw(mwu_probs, 0, num_experts) 
        experts_loss_lst[t] = [(outcomes[t] - experts_reports[i][t])**2 for i in range(num_experts)]   
        mwu_loss[t] = experts_loss_lst[t][exp_chosen]        
        # average report for this round
        avg_rep     = 1.0*sum([experts_reports[i][t] for i in range(num_experts)])/num_experts
        # loss of <mwu weighted avg rep> - loss of <simple avg rep>
        uniform_fixed_loss[t] = (outcomes[t] - avg_rep)**2
        mwu_weighted_rep = 1.0*sum([mwu_probs[i]*experts_reports[i][t] for i in range(num_experts)])
        mwu_weighted_loss[t] = (outcomes[t] - mwu_weighted_rep)**2

        # weight update
        temp = [mwu_weights[i]*(1.0 - eta*experts_loss_lst[t][i]) for i in range(num_experts)]
        mwu_weights = temp

        # probs update
        mwu_probs = [1.0*mwu_weights[i]/sum(mwu_weights) for i in range(num_experts)]

        (regr_best, bf) = regret(experts_loss_lst, num_experts, mwu_loss, t)
        
        mwu_rep_regr.append(regr_best)    

    return (sample_id, num_experts, mwu_rep_regr, [sum(mwu_loss[:t+1]) for t in range(T)], [sum(mwu_weighted_loss[:t+1]) for t in range(T)], [sum(uniform_fixed_loss[:t+1]) for t in range(T)])

Beispiel #12

0

Datei anzeigen

Datei: runner_winexp_all_bidders.py Projekt: zfengharvard/bandit-sponsored-search

def main_winexp(bidder, curr_rep, T, num_bidders, num_slots, outcome_space,
                rank_scores, ctr, reserve, values, bids, threshold, noise,
                num_adaptive):
    algo_util = []
    temp_regr = []
    clean_alloc = [[] for _ in range(0, T)]
    for t in range(0, T):
        bid_chosen = [bidder[i].bidding() for i in range(0, num_adaptive)]
        for i in range(0, num_adaptive):
            bids[t][i] = bid_chosen[i]
        bid_vec = deepcopy(bids[t])
        gsp_instance = GSP(ctr[t], reserve[t], bid_vec, rank_scores[t],
                           num_slots, num_bidders)
        # this is not reported to the bidder, and thus is cleaned of noise
        for i in range(0, num_adaptive):
            allocated = gsp_instance.alloc_func(bidder[i].id,
                                                bids[t][bidder[i].id])
            temp = [
                gsp_instance.alloc_func(bidder[i].id, bid * bidder[i].eps)
                for bid in range(0, bidder[i].bid_space)
            ]

            if (i == 0):
                clean_alloc[t] = deepcopy(temp)
            # bidder sees noisy data as his allocation
            noise_cp = deepcopy(noise)
            bidder[i].alloc_func[t] = noise_mask(temp, noise_cp[t], ctr[t],
                                                 num_slots)

            #reward function: value - payment(coming from GSP module)
            bidder[i].pay_func[t] = [
                gsp_instance.pay_func(bidder[i].id, bid * bidder[i].eps)
                for bid in range(0, bidder[i].bid_space)
            ]
            #### WIN-EXP computations ####
            # computation of reward will only be used for the regret
            if (i == 0):
                if allocated > threshold[t]:
                    bidder[i].reward_func[t] = [
                        (values[t][0] - bidder[i].pay_func[t][b])
                        for b in range(0, bidder[i].bid_space)
                    ]
                    bidder[i].utility[t] = bidder[i].compute_utility(
                        1, bidder[i].reward_func[t], bidder[i].alloc_func[t])
                else:
                    bidder[i].reward_func[t] = [
                        0 for _ in range(0, bidder[i].bid_space)
                    ]
                    bidder[i].utility[t] = (bidder[i].compute_utility(
                        0, bidder[i].reward_func[t], bidder[i].alloc_func[t]))

                (bidder[i].weights,
                 bidder[i].pi) = bidder[i].weights_update_winexp(
                     bidder[i].eta_winexp, bidder[i].utility[t])

            else:
                if allocated > threshold[t]:
                    bidder[i].reward_func[t] = [
                        (values[t][i] - bidder[i].pay_func[t][b])
                        for b in range(0, bidder[i].bid_space)
                    ]
                else:
                    bidder[i].reward_func[t] = [
                        0 for _ in range(0, bidder[i].bid_space)
                    ]

                bidder[i].utility[t] = bidder[i].reward_func[t]

                #weights update
                arm_chosen = int(math.ceil(bids[t][i] / bidder[i].eps))

                if bidder[i].pi[arm_chosen] < np.exp(-700):
                    bidder[i].pi[arm_chosen] = np.exp(-700)
                estimated_loss = -bidder[i].utility[t][arm_chosen] / bidder[
                    i].pi[arm_chosen]
                bidder[i].loss[arm_chosen] += estimated_loss
                arr = np.array([(-bidder[i].eta_exp3) * bidder[i].loss[b]
                                for b in range(0, bidder[i].bid_space)],
                               dtype=np.float128)
                bidder[i].weights = np.exp(arr)
                bidder[i].pi = [
                    bidder[i].weights[b] / sum(bidder[i].weights)
                    for b in range(0, bidder[i].bid_space)
                ]

        # for each auction (at the same t) you choose the same arm
        arm = int(math.ceil(bids[t][0] / bidder[0].eps))

        algo_util.append((bidder[0].reward_func[t][arm] * clean_alloc[t][arm]))
        temp_regr.append(
            regret(bidder[0].reward_func, clean_alloc, bidder[0].bid_space,
                   algo_util, t))

    return temp_regr

Beispiel #13

0

Datei anzeigen

def main_gexp3(bidder, curr_rep, T, num_bidders, num_slots, outcome_space,
               rank_scores, ctr, reserve, values, bids, num_auctions):
    algo_util = []
    temp_regr = []
    gamma = 1.05 * math.sqrt(
        bidder.bid_space * math.log(bidder.bid_space, 2) / T)
    for t in range(0, T):
        #bid_chosen = round(bidder.gbidding(bidder.weights,gamma),2)
        bid_chosen = round(bidder.bidding(), 2)
        # the bid chosen by the learner is the same for all auctions in the batch
        for auction in range(0, num_auctions):
            #bid_chosen = round(np.random.uniform(0,1),1)
            #bid_chosen = round(np.random.uniform(0,1),2)
            #print ("Bid chosen for timestep t=%d is:%f"%(t,bid_chosen))
            #bid_chosen = round(np.random.uniform(0,1),2)
            bids[auction][t][0] = bid_chosen
            #print ("Repetition=%d, timestep t=%d, auction=%d"%(curr_rep, t, auction))
            #print ("Rank scores")
            #print rank_scores[auction][t]
            #print ("CTR")
            #print ctr[auction][t]
            #print ("reserve")
            #print reserve[auction][t]
            #print ("bids")
            #print bids[auction][t]
            allocated = GSP(ctr[auction][t], reserve[auction][t],
                            bids[auction][t], rank_scores[auction][t],
                            num_slots, num_bidders).alloc_func(
                                bidder.id, bids[auction][t][bidder.id])
            bidder.alloc_func[auction][t] = [
                GSP(ctr[auction][t], reserve[auction][t], bids[auction][t],
                    rank_scores[auction][t], num_slots,
                    num_bidders).alloc_func(bidder.id, bid * bidder.eps)
                for bid in range(0, bidder.bid_space)
            ]
            #print ("alloc func")
            #print bidder.alloc_func
            #reward function: value - payment(coming from GSP module)
            bid_vec = deepcopy(bids[auction][t])
            bidder.pay_func[t] = [
                GSP(ctr[auction][t], reserve[auction][t], bid_vec,
                    rank_scores[auction][t], num_slots,
                    num_bidders).pay_func(bidder.id, bid * bidder.eps)
                for bid in range(0, bidder.bid_space)
            ]
            #print ("pay func")
            #print bidder.pay_func

            bidder.reward_func[auction][t] = compute_reward(
                bidder.alloc_func[auction][t], bidder.pay_func[t],
                ctr[auction][t], values[auction][t])
            #print ("Bidder's Reward function")
            #print bidder.reward_func
            #### EXP3 computations ####
            #bidder.utility[auction][t] = [bidder.reward_func[auction][t][b]*bidder.alloc_func[auction][t][b] - 1 for b in range(0, bidder.bid_space)]
            #bidder.utility[t] = [bidder.reward_func[t][b]*bidder.alloc_func[t][b] - 1 for b in range(0, bidder.bid_space)]
            #print ("Bidder %d utility"%bidder.id)
            #print bidder.utility

        u_s = [0 for _ in range(0, bidder.bid_space)]
        for b in range(0, bidder.bid_space):
            for auction in range(0, num_auctions):
                u_s[b] += bidder.reward_func[auction][t][
                    b] * bidder.alloc_func[auction][t][b]
            bidder.avg_utility[t].append(u_s[b] / num_auctions - 1)

        #weights update
        arm_chosen = int(math.ceil(bids[0][t][0] / bidder.eps))
        #print ("Arm Chosen")
        #print arm_chosen
        #print ("Average reward at timestep t=%d is:"%t)
        #print (bidder.avg_reward[t])
        #print ("Average utility at timestep t=%d is:"%t)
        #print (bidder.avg_utility[t])

        #print ("Probability vector before computing estimated loss")
        #print bidder.pi

        if bidder.pi[arm_chosen] < 0.0000000001:
            bidder.pi[arm_chosen] = 0.0000000001
        #estimated_loss = bidder.avg_utility[t][arm_chosen]/bidder.pi[arm_chosen]
        estimated_loss = (bidder.avg_utility[t][arm_chosen] -
                          bidder.beta) / bidder.pi[arm_chosen]
        #print ("Estimated Loss")
        #print estimated_loss
        bidder.loss[arm_chosen] += estimated_loss
        #print bidder.loss
        bidder.weights = [
            math.exp(-bidder.eta_gexp3 * bidder.loss[b])
            for b in range(0, bidder.bid_space)
        ]
        bidder.pi = [(1 - gamma) * bidder.weights[b] / sum(bidder.weights) +
                     gamma / bidder.bid_space
                     for b in range(0, bidder.bid_space)]

        #print ("Probability vector after computing estimated loss")
        #print bidder.pi
        # compute the algorithm's utility at every step
        #algo_util.append(bidder.avg_utility[t][int(math.ceil(bids[0][t][bidder.id]/bidder.eps))])

        algo_util.append(bidder.avg_utility[t][int(
            math.ceil(bids[0][t][0] / bidder.eps))])
        #print ("Algorithm's average utility")
        #print algo_util
        temp_regr.append(
            regret(0, bidder.reward_func, bidder.alloc_func, bidder.bid_space,
                   algo_util, t, num_auctions))
        #print ("Regret inside" )
        #print temp_regr

    return temp_regr

Beispiel #14

0

Datei anzeigen

Datei: runner_dgrind.py Projekt: charapod/learn-strat-class

def main_dgrind(regress, principal, agents, oracle, resp_lst, curr_rep, T,
                num_agents, d):
    temp_regr = []
    algo_loss = []
    actions_taken = []
    updated = []
    updated_curr = [0] * principal.calA_size
    print("runner dgrind repetition: %d" % curr_rep)
    for t in range(T):
        print("Timestep t=%d" % t)
        cp_probs = deepcopy(principal.pi)
        (a_t, arm_chosen) = principal.choose_action()
        resp = agents[t].response(a_t, d)
        principal.loss_func[t] = oracle.compute_loss(resp_lst, t)
        if (not regress):
            in_probs = oracle.compute_in_probs(cp_probs, d, resp_lst, t)
        else:  # no omnipotent oracle, only a regression one is available
            actions_taken.append(
                principal.calA[arm_chosen]
            )  # list containing all actions taken by principal
            updated_curr[arm_chosen] = 1  # flag for the current arm
            tot = 0.0
            incl = [
                0
            ] * principal.calA_size  # unclear what it does at the moment
            for i in range(principal.calA_size
                           ):  # iterate over all of principal's actions
                a = principal.calA[i]
                dist = 1.0 * np.dot(a, resp) / np.linalg.norm(
                    a[:d])  # compute distance of current point from action a
                if (i != arm_chosen):
                    if (np.abs(dist) >= 2 * agents[t].delta):
                        updated_curr[i] = 1
                        tot += principal.pi[
                            i]  # builds the dataset for the logistic regression
                        incl[i] = 1
                    else:
                        updated_curr[i] = 0
                else:
                    if np.abs(dist) >= 2 * agents[t].delta:
                        tot += principal.pi[i]
                        incl[i] = 1

            updated.append(updated_curr)
            in_probs = oracle.compute_in_probs_regr(cp_probs, d, t, updated,
                                                    actions_taken, tot, incl)

        estimated_loss = (
            1.0 * principal.loss_func[t][arm_chosen]) / in_probs[arm_chosen]
        principal.est_loss[arm_chosen] += estimated_loss

        for i in range(principal.calA_size):
            a = principal.calA[i]
            if (i != arm_chosen):
                dist = 1.0 * np.dot(a, resp) / np.linalg.norm(a[:d])
                if np.abs(dist) >= 2 * agents[t].delta:
                    if (np.sign(dist * agents[t].label) == -1):
                        principal.est_loss[i] += (1.0) / in_probs[i]
                    else:
                        principal.est_loss[i] += (0.0) / in_probs[i]

        # according to whether you have access to omnipotent oracle or just a regression one, eta changes accordingly
        if (not regress):
            eta = principal.eta_dgrind
        else:
            eta = principal.eta_dgrind_regress

        arr = np.array([(-eta) * principal.est_loss[i]
                        for i in range(principal.calA_size)],
                       dtype=np.float128)
        principal.weights = np.exp(arr)
        principal.pi = [
            principal.weights[i] / sum(principal.weights)
            for i in range(principal.calA_size)
        ]

        # prevent division by almost 0
        for j in range(principal.calA_size):
            if (principal.pi[j] < 0.00000001):
                principal.pi[j] = 0.00000001

        # cumulative utility for the actions played
        algo_loss.append(principal.loss_func[t][arm_chosen])
        temp_regr.append(
            regret(principal.loss_func, principal.calA, algo_loss, t))

    return temp_regr