lb11 = [0] * num_trials val0 = [0] * num_trials val1 = [0] * num_trials for r in range(num_trials): bidders[0].valuations = bidders[0].make_valuations() learner.valuations = bidders[0].valuations val0[r] = bidders[0].valuations[0] if num_rounds > 1: val1[r] = bidders[0].valuations[1] if r == 0: learner.calc_expected_rewards() else: learner.calc_terminal_state_rewards() learner.solve_mdp() b0[r] = bidders[0].place_bid(1) lb0[r] = learner.place_bid(1) if num_rounds > 1: lb10[r] = learner.place_bid(2) learner.num_goods_won = 1 lb11[r] = learner.place_bid(2) learner.num_goods_won = 0 if not(learner.is_bidding_valuation_in_final_round()): truthful_result_output = 'Does not bid truthfully in last round.' else: truthful_result_output = 'Truthful bidding in last round' print(learner.valuations, round(b0[r], learner.digit_precision), lb0[r], lb10[r], lb11[r], truthful_result_output) """ plt.figure() for s in learner.price_prediction.keys(): if len(s) == 2:
policies = {} exp_payment = {} print('See how learner bids') calc_rewards = True for v in itertools.product(possible_types, repeat=num_rounds): learner.valuations = v if calc_rewards: learner.calc_expected_rewards() calc_rewards = False else: learner.calc_terminal_state_rewards() learner.solve_mdp() policies[tuple(v)] = copy.deepcopy(learner.pi) exp_payment[tuple(v)] = copy.deepcopy(learner.exp_payment) print('Valuation vector =', v) print('First round bid =', learner.place_bid(1)) for s in learner.state_space: Q = {} if s in learner.terminal_states: continue for a in learner.action_space: Q[a] = learner.Q[(s, a)] print('State', s, '. Optimal Action =', learner.pi[s], '. Q of each action:', Q) print('Values at terminal states') for s in learner.terminal_states: print('State', s, '. V[s] =', learner.V[s]) if not (learner.is_bidding_valuation_in_final_round()): truthful_result_output = 'Does not bid truthfully in last round.' else: truthful_result_output = 'Truthful bidding in last round' print(truthful_result_output)