#           for i in range(num_bidders)]
# bidders = [WeberBidder(i, num_rounds, num_bidders, possible_types, type_dist, type_dist_disc)
#           for i in range(num_bidders)]
learner = MDPBidderUAI(num_bidders, num_rounds, num_bidders, possible_types, type_dist, type_dist_disc)
learner.learn_auction_parameters(bidders, num_mc)

# Plot what the bidder has learned
learner.valuations = [.2, .1]
learner.calc_expected_rewards()
learner.solve_mdp()
plot_exp_payments(learner)
plot_transition(learner)
plot_prob_winning_and_transition(learner)
plot_Q_values(learner)
plot_price_pdf(learner)
print(learner.place_bid(1))
# print(learner.place_bid(2))

# Compare learner to other agents
bidders[0].reset()
b0 = [0] * len(bidders[0].possible_types)
lb0 = [0] * len(learner.possible_types)
for t_idx, t in enumerate(learner.possible_types):
    bidders[0].valuations = [t, t / 2.0]
    learner.valuations = [t, t / 2.0]
    if t_idx == 0:
        learner.calc_expected_rewards()
    else:
        learner.calc_terminal_state_rewards()
    learner.solve_mdp()
    # b20 = learner.place_bid(2)