for ii, p in enumerate(generate_buyer_prices_paper(20)):
        buyers.append(LinearBlackBoxBuyer('b' + str(ii), p, noisy=True))

    agents = sellers + buyers

    for ag in agents:
        ag.new_game()

    ROUNDS_PER_GAME = 10

    market_env = MarketEnvironment(
        sellers=sellers,
        buyers=buyers,
        max_time=10,  ## not the same as rounds per game!!
        matcher=RandomMatcher(reward_on_reference=True))
    init_observation = market_env.reset()

    round_avg = 0.
    offer_avg = 0.
    time_avg = 0.

    records['demands'].append([])
    records['rewards'].append([])
    records['prices'].append(seller_agent.reservation_price)

    for n_round in range(10):

        init_observation = market_env.reset()
        observations = {k.agent_id: None for k in agents}
        done = {k.agent_id: False for k in agents}
        reward_hist = []
Beispiel #2
0
        agent.coefs = np.array([0.05, 0.95] + [0]*(size_coefs - 2))

    # Reset agents' rewards and observations
    for agent in sellers:
        agent.reward = 0.0
        agent.observations = {}
    for agent in buyers:
        agent.reward = 0.0
        agent.observations = {}

    # Loop over rounds
    for r in range(n_round):
        print("ROUND", r, '-----------------------------------------------')

        # Reset market environment
        market_env.reset()

        # Initial offers are generated
        current_offers = {}
        for agent in sellers:
            current_offers[agent.agent_id] = np.random.normal(200, 5)
        for agent in buyers:
            current_offers[agent.agent_id] = np.random.normal(100, 5)

        # Loop over time steps
        i = 0
        while market_env.if_round_done is False:
            print(i, '-------')
            i += 1
            # Environment calculates what happens
            market_env.step(current_offers)