예제 #1
0
            Q[a] = learner.Q[(s, a)]
        print('State', s, '. Optimal Action =', learner.pi[s], '. Q of each action:', Q)
    print('Values at terminal states')
    for s in learner.terminal_states:
        print('State', s, '. V[s] =', learner.V[s])
    if not (learner.is_bidding_valuation_in_final_round()):
        truthful_result_output = 'Does not bid truthfully in last round.'
    else:
        truthful_result_output = 'Truthful bidding in last round'
    print(truthful_result_output)

# Compare utility of simple vs learned
print('Run Learner and see how it does')
num_trials = 1000

sa = SequentialAuction(bidders, num_rounds)
util_simple = [-1] * num_trials
for t in range(num_trials):
    for bidder in bidders:
        bidder.reset()
        bidder.valuations = bidder.make_valuations()
    sa.run()
    util_simple[t] = sum(bidders[1].utility)

sa = SequentialAuction([bidders[0], learner], num_rounds)
util_learner = [-1] * num_trials
for t in range(num_trials):
    for bidder in bidders:
        bidder.reset()
        bidder.valuations = bidder.make_valuations()
    learner.reset()
"""
print("----------")
print("Discrete Distribution")
type_dist = [1.0 / len(possible_types)] * len(possible_types)
# Generate bidders
bidders_disc = [WeberBidder(i, num_rounds, num_bidders, possible_types, type_dist, True)
                for i in range(num_bidders)]
# Run auction
auction = SequentialAuction(bidders_disc, num_rounds)
auction.run()
auction.print_summary()
auction.print_round_overview()
auction.print_bidder_results()
"""

# Continuous type distribution
print("----------")
print("Continuous Distribution")
type_dist = [1.0] * len(possible_types)
# Generate bidders
bidders_cont = [WeberBidder(i, num_rounds, num_bidders, possible_types, type_dist, False)
                for i in range(num_bidders)]
#for i in range(num_bidders):
#    bidders_cont[i].valuations = bidders_disc[i].valuations
# Run auction
auction = SequentialAuction(bidders_cont, num_rounds)
auction.run()
auction.print_summary()
auction.print_round_overview()
auction.print_bidder_results()
예제 #3
0
    def learn_auction_parameters(self, bidders, num_mc=1000):
        """
        Learn the highest bid of n - 1 bidders and the probability of winning.

        :param bidders: List.  Bidders to learn from.
        :param num_mc: Integer.  Number of times to test an action.
        """
        exp_payment = defaultdict(float)
        exp_T = defaultdict(float)
        prob_win = defaultdict(float)
        win_count = defaultdict(float)
        sa_counter = defaultdict(float)
        sas_counter = defaultdict(float)
        highest_other_bid = defaultdict(list)

        sa = SequentialAuction(bidders, self.num_rounds)
        self.state_space.add((0, 0))
        for t in range(num_mc):
            # Refresh bidders
            for bidder in bidders:
                bidder.valuations = bidder.make_valuations()
                bidder.reset()
            # Run auction and learn results of nth bidder
            sa.run()
            num_won = 0
            last_price_seen = None
            s = s_ = (0, 0)
            for j in range(self.num_rounds):
                s = s_
                largest_bid_amongst_n_minus_1 = round(max(sa.bids[j][:-1]), 2)
                highest_other_bid[s].append(largest_bid_amongst_n_minus_1)
                # The action closest to the Nth bidder
                a = round(sa.bids[j][-1], 2)
                self.action_space.add(a)
                sa_counter[(s, a)] += 1
                won_this_round = bidders[-1].win[j]
                # Outcome depends on the action we placed, which is hopefully close to what the Nth bidder used.
                if won_this_round:
                    win_count[(s, a)] += 1
                    exp_payment[(s, a)] -= largest_bid_amongst_n_minus_1
                    num_won += 1
                    p = round(sa.payments[j], 2)
                    self.prices_in_state.add(p)
                    last_price_seen = min(self.prices_in_state, key=lambda x: abs(x - p))
                else:
                    last_price_seen = 0.0
                s_ = self.get_next_state(s, won_this_round)
                self.state_space.add(s_)
                sas_counter[(s, a, s_)] += 1
            self.state_space.add(s_)
            self.terminal_states.add(s_)

        self.num_price_samples = len(self.action_space)

        # Turn these into lists and sort them, so that access is ordered and predictable.
        self.state_space = list(self.state_space)
        self.state_space.sort()
        self.terminal_states = list(self.terminal_states)
        self.terminal_states.sort()
        self.action_space = list(self.action_space)
        self.action_space.sort()
        self.prices_in_state = list(self.prices_in_state)
        self.prices_in_state.sort()
        self.num_price_samples = len(self.prices_in_state)

        self.exp_payment = {(s, a): exp_payment[(s, a)] / sa_counter[(s, a)]
                            for (s, a) in sa_counter.keys()}

        self.exp_T = {(s, a, s_): sas_counter[(s, a, s_)] / sa_counter[(s, a)]
                      for (s, a, s_) in sas_counter.keys()}

        self.prob_win = {(s, a): win_count[(s, a)] / sa_counter[(s, a)]
                         for (s, a) in sa_counter.keys()}

        self.perform_price_prediction(highest_other_bid)
        #self.calc_transition_matrix()
        self.T = self.exp_T