Q[a] = learner.Q[(s, a)] print('State', s, '. Optimal Action =', learner.pi[s], '. Q of each action:', Q) print('Values at terminal states') for s in learner.terminal_states: print('State', s, '. V[s] =', learner.V[s]) if not (learner.is_bidding_valuation_in_final_round()): truthful_result_output = 'Does not bid truthfully in last round.' else: truthful_result_output = 'Truthful bidding in last round' print(truthful_result_output) # Compare utility of simple vs learned print('Run Learner and see how it does') num_trials = 1000 sa = SequentialAuction(bidders, num_rounds) util_simple = [-1] * num_trials for t in range(num_trials): for bidder in bidders: bidder.reset() bidder.valuations = bidder.make_valuations() sa.run() util_simple[t] = sum(bidders[1].utility) sa = SequentialAuction([bidders[0], learner], num_rounds) util_learner = [-1] * num_trials for t in range(num_trials): for bidder in bidders: bidder.reset() bidder.valuations = bidder.make_valuations() learner.reset()
""" print("----------") print("Discrete Distribution") type_dist = [1.0 / len(possible_types)] * len(possible_types) # Generate bidders bidders_disc = [WeberBidder(i, num_rounds, num_bidders, possible_types, type_dist, True) for i in range(num_bidders)] # Run auction auction = SequentialAuction(bidders_disc, num_rounds) auction.run() auction.print_summary() auction.print_round_overview() auction.print_bidder_results() """ # Continuous type distribution print("----------") print("Continuous Distribution") type_dist = [1.0] * len(possible_types) # Generate bidders bidders_cont = [WeberBidder(i, num_rounds, num_bidders, possible_types, type_dist, False) for i in range(num_bidders)] #for i in range(num_bidders): # bidders_cont[i].valuations = bidders_disc[i].valuations # Run auction auction = SequentialAuction(bidders_cont, num_rounds) auction.run() auction.print_summary() auction.print_round_overview() auction.print_bidder_results()
def learn_auction_parameters(self, bidders, num_mc=1000): """ Learn the highest bid of n - 1 bidders and the probability of winning. :param bidders: List. Bidders to learn from. :param num_mc: Integer. Number of times to test an action. """ exp_payment = defaultdict(float) exp_T = defaultdict(float) prob_win = defaultdict(float) win_count = defaultdict(float) sa_counter = defaultdict(float) sas_counter = defaultdict(float) highest_other_bid = defaultdict(list) sa = SequentialAuction(bidders, self.num_rounds) self.state_space.add((0, 0)) for t in range(num_mc): # Refresh bidders for bidder in bidders: bidder.valuations = bidder.make_valuations() bidder.reset() # Run auction and learn results of nth bidder sa.run() num_won = 0 last_price_seen = None s = s_ = (0, 0) for j in range(self.num_rounds): s = s_ largest_bid_amongst_n_minus_1 = round(max(sa.bids[j][:-1]), 2) highest_other_bid[s].append(largest_bid_amongst_n_minus_1) # The action closest to the Nth bidder a = round(sa.bids[j][-1], 2) self.action_space.add(a) sa_counter[(s, a)] += 1 won_this_round = bidders[-1].win[j] # Outcome depends on the action we placed, which is hopefully close to what the Nth bidder used. if won_this_round: win_count[(s, a)] += 1 exp_payment[(s, a)] -= largest_bid_amongst_n_minus_1 num_won += 1 p = round(sa.payments[j], 2) self.prices_in_state.add(p) last_price_seen = min(self.prices_in_state, key=lambda x: abs(x - p)) else: last_price_seen = 0.0 s_ = self.get_next_state(s, won_this_round) self.state_space.add(s_) sas_counter[(s, a, s_)] += 1 self.state_space.add(s_) self.terminal_states.add(s_) self.num_price_samples = len(self.action_space) # Turn these into lists and sort them, so that access is ordered and predictable. self.state_space = list(self.state_space) self.state_space.sort() self.terminal_states = list(self.terminal_states) self.terminal_states.sort() self.action_space = list(self.action_space) self.action_space.sort() self.prices_in_state = list(self.prices_in_state) self.prices_in_state.sort() self.num_price_samples = len(self.prices_in_state) self.exp_payment = {(s, a): exp_payment[(s, a)] / sa_counter[(s, a)] for (s, a) in sa_counter.keys()} self.exp_T = {(s, a, s_): sas_counter[(s, a, s_)] / sa_counter[(s, a)] for (s, a, s_) in sas_counter.keys()} self.prob_win = {(s, a): win_count[(s, a)] / sa_counter[(s, a)] for (s, a) in sa_counter.keys()} self.perform_price_prediction(highest_other_bid) #self.calc_transition_matrix() self.T = self.exp_T