# # Run the training algorithm

# In[9]:

mdict = torch.load('results/models2')
seller_agent.actor.load_state_dict(mdict['actor'])
seller_agent.actor_target.load_state_dict(mdict['actor_target'])

seller_agent.critic.load_state_dict(mdict['critic'])
seller_agent.critic_target.load_state_dict(mdict['critic_target'])

# In[10]:

for e in range(epochs):
    seller_agent.reservation_price = generate_seller_prices_paper(1)[0]

    sellers = []
    for ii, p in enumerate(generate_seller_prices_paper(19)):
        sellers.append(LinearBlackBoxSeller('s' + str(ii), p, noisy=True))
    sellers.append(seller_agent)

    buyers = []
    for ii, p in enumerate(generate_buyer_prices_paper(20)):
        buyers.append(LinearBlackBoxBuyer('b' + str(ii), p, noisy=True))

    agents = sellers + buyers

    for ag in agents:
        ag.new_game()
import time
import warnings
# pandas setting warnings can be ignored, as it is intendend often
warnings.simplefilter("ignore")

start = time.time()

# Define the initial number of agents, the number of rounds and games
n_sellers = 100
n_buyers = 100
n_game = 1
n_round = 10

# Create initial agents with names and reservation prices
# All agents are the same for now
res_prices = generate_seller_prices_paper(discrete=False, count=n_sellers)
names = ['Seller ' + str(i) for i in range(1, n_sellers + 1)]
sellers = np.array([
    LinearBlackBoxSeller(agent_id=names[i],
                         reservation_price=res_prices[i],
                         noisy=True) for i in range(n_sellers)
])
res_prices = generate_buyer_prices_paper(discrete=False, count=n_buyers)
names = ['Buyer ' + str(i) for i in range(1, n_buyers + 1)]
buyers = np.array([
    LinearBlackBoxBuyer(agent_id=names[i],
                        reservation_price=res_prices[i],
                        noisy=True) for i in range(n_buyers)
])

# For plotting