sellers.append(seller_agent) buyers = [] for ii, p in enumerate(generate_buyer_prices_paper(20)): buyers.append(LinearBlackBoxBuyer('b' + str(ii), p, noisy=True)) agents = sellers + buyers for ag in agents: ag.new_game() ROUNDS_PER_GAME = 10 market_env = MarketEnvironment( sellers=sellers, buyers=buyers, max_time=10, ## not the same as rounds per game!! matcher=RandomMatcher(reward_on_reference=True)) init_observation = market_env.reset() round_avg = 0. offer_avg = 0. time_avg = 0. records['demands'].append([]) records['rewards'].append([]) records['prices'].append(seller_agent.reservation_price) for n_round in range(10): init_observation = market_env.reset()
# For plotting fig, ax = plt.subplots(figsize=(8, 8), tight_layout=True) ax.set_xlim(95, 205) # Loop over games for g in range(n_game): print("GAME", g, '=================================================================================================================') # Define parameters of each round max_time = 30 matcher = RandomMatcher(reward_on_reference=False) # Create market environment market_env = MarketEnvironment(sellers=sellers, buyers=buyers, max_time=max_time, matcher=matcher) # HERE AGENTS LEARN AND ADJUST THEIR COEFS (for now the are constant) for agent in sellers: size_coefs = agent.determine_size_of_coefs(n_sellers=n_sellers) agent.coefs = np.array([0.05, 0.95] + [0]*(size_coefs - 2)) for agent in buyers: size_coefs = agent.determine_size_of_coefs(n_buyers=n_buyers) agent.coefs = np.array([0.05, 0.95] + [0]*(size_coefs - 2)) # Reset agents' rewards and observations for agent in sellers: agent.reward = 0.0 agent.observations = {} for agent in buyers: agent.reward = 0.0