Exemple #1
0
while not terminated_1 and not terminated_2:

    # the first agent
    # print("agent 1")
    action_id = agent_1.action_based_on_policy(state_1, env)
    one_hot_action = one_hot(action_id, nr_actions)
    new_state, reward, terminated_1 = env.step(action_id, state_1)
    scaled_state_1 = scale_state(state_1, env)
    #histories_1.appending(reward, scaled_state_1, one_hot_action)
    plt.scatter(state_1[0, 0], state_1[0, 1], s=100, c='#C1C7C9', marker='s')
    plt.scatter(new_state[0, 0], new_state[0, 1], s=50, c='red')
    plt.show()
    plt.pause(0.1)

    state_1, steps_1 = update_state_step(new_state, steps_1)

    # the second agent
    # print("agent 2")
    action_id = agent_2.action_based_on_policy(state_2, env)
    one_hot_action = one_hot(action_id, nr_actions)
    new_state, reward, terminated_2 = env.step(action_id, state_2)
    scaled_state_2 = scale_state(state_2, env)
    #histories_2.appending(reward, scaled_state_2, one_hot_action)
    plt.scatter(state_2[0, 0], state_2[0, 1], s=100, c='#C1C7C9', marker='s')
    plt.scatter(new_state[0, 0], new_state[0, 1], s=50, c='blue')
    plt.show()
    plt.pause(0.1)

    state_2, steps_2 = update_state_step(new_state, steps_2)
        while not terminated:

            action_id = agent_ler.action_based_on_Q_target(state,
                                                           env,
                                                           epsilon=epsilon)

            new_state, reward, terminated, info = env.step(action_id)

            new_state = single_shape_adaptor(new_state, nr_features)

            this_event = event(state, action_id, reward, new_state, terminated,
                               env)

            replay_buffer.consider_this_event(this_event)

            state, steps = update_state_step(new_state, steps)

        print("...          " + str(steps) +
              " new event are added to the replay_buffer")

        print("...          updating the Q started")

        for k in range(K):

            current_batch = replay_buffer.return_a_batch(batchsize=64)
            agent_ler.learn(current_batch, env)

        print("...          updating the Q finished")

    print("...    the Q-target update is started.")