lb11 = [0] * num_trials
val0 = [0] * num_trials
val1 = [0] * num_trials
for r in range(num_trials):
    bidders[0].valuations = bidders[0].make_valuations()
    learner.valuations = bidders[0].valuations
    val0[r] = bidders[0].valuations[0]
    if num_rounds > 1:
        val1[r] = bidders[0].valuations[1]
    if r == 0:
        learner.calc_expected_rewards()
    else:
        learner.calc_terminal_state_rewards()
    learner.solve_mdp()
    b0[r] = bidders[0].place_bid(1)
    lb0[r] = learner.place_bid(1)
    if num_rounds > 1:
        lb10[r] = learner.place_bid(2)
        learner.num_goods_won = 1
        lb11[r] = learner.place_bid(2)
        learner.num_goods_won = 0
    if not(learner.is_bidding_valuation_in_final_round()):
        truthful_result_output = 'Does not bid truthfully in last round.'
    else:
        truthful_result_output = 'Truthful bidding in last round'
    print(learner.valuations, round(b0[r], learner.digit_precision), lb0[r], lb10[r], lb11[r], truthful_result_output)

"""
plt.figure()
for s in learner.price_prediction.keys():
    if len(s) == 2:
Esempio n. 2
0
policies = {}
exp_payment = {}
print('See how learner bids')
calc_rewards = True
for v in itertools.product(possible_types, repeat=num_rounds):
    learner.valuations = v
    if calc_rewards:
        learner.calc_expected_rewards()
        calc_rewards = False
    else:
        learner.calc_terminal_state_rewards()
    learner.solve_mdp()
    policies[tuple(v)] = copy.deepcopy(learner.pi)
    exp_payment[tuple(v)] = copy.deepcopy(learner.exp_payment)
    print('Valuation vector =', v)
    print('First round bid =', learner.place_bid(1))
    for s in learner.state_space:
        Q = {}
        if s in learner.terminal_states:
            continue
        for a in learner.action_space:
            Q[a] = learner.Q[(s, a)]
        print('State', s, '. Optimal Action =', learner.pi[s], '. Q of each action:', Q)
    print('Values at terminal states')
    for s in learner.terminal_states:
        print('State', s, '. V[s] =', learner.V[s])
    if not (learner.is_bidding_valuation_in_final_round()):
        truthful_result_output = 'Does not bid truthfully in last round.'
    else:
        truthful_result_output = 'Truthful bidding in last round'
    print(truthful_result_output)