def test(ag1, ag2, render=False, load_model=False):
    test_env = CMOTP()
    if load_model:
        ag1.load_model()
        ag2.load_model()
    ep_log = []
    for iii in range(TEST_NUM):
        print('test {}'.format(iii))
        state1, state2 = test_env.reset()
        ep_len = 0
        while True:
            if render:
                test_env.render()
                time.sleep(1)
            action1 = ag1.choose_action(state1, 0, 0.0)
            action2 = ag2.choose_action(state2, 0, 0.0)
            next_state_, reward_, done_, _ = test_env.step([action1, action2])
            next_state1, next_state2 = next_state_
            state1 = next_state1
            state2 = next_state2
            ep_len += 1
            if done_[0] or ep_len >= 1000:
                ep_log.append(ep_len)
                break
    return np.mean(ep_log)
Esempio n. 2
0
            sts_2 = np.asarray(sts_2, dtype=np.float32).reshape(train_input_shape)
            acts_2 = np.asarray(acts_2, dtype=np.int32).flatten()
            rwds_2 = np.asarray(rwds_2, dtype=np.float32).flatten()
            n_sts_2 = np.asarray(n_sts_2, dtype=np.float32).reshape(train_input_shape)
            dns_2 = np.asarray(dns_2, dtype=np.bool).flatten()
            ln_2 = np.asarray(ln_2, dtype=np.float32).flatten()

            # train
            agent1.train_without_replaybuffer(sts_1, acts_1, rwds_1, ln_1)
            agent2.train_without_replaybuffer(sts_2, acts_2, rwds_2, ln_2)

    else:
        test_env = CMOTP()
        agent1.load_model()
        agent2.load_model()
        for i in range(TEST_NUM):
            state1, state2 = test_env.reset()
            while True:
                test_env.render()
                time.sleep(1)
                action1 = agent1.choose_action(state1, 0, 0.0)
                action2 = agent2.choose_action(state2, 0, 0.0)
                next_state, reward, done, _ = test_env.step([action1, action2])
                next_state1, next_state2 = next_state
                state1 = next_state1
                state2 = next_state2
                if done[0]:
                    break

    env.close()
Esempio n. 3
0
#             next_state, reward, done, _ = env.step(action_n)
#
#             len_this_episode += 1
#             state = next_state
#
#             if reward == 10.:
#                 break
#         print(i, len_this_episode)
#         len_episodes.append(len_this_episode)
#
#     print(np.mean(len_episodes))

from Env.cmotp_IL import CMOTP
import time
if __name__ == '__main__':
    env = CMOTP()
    state = env.reset()
    env.map = [[-1, -1, -1, 0, 0, 0, 0, 0, -1, -1, -1, -1],
               [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1],
               [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1],
               [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1],
               [-1, -1, -1, -1, 0, 0, 0, -1, -1, -1, -1, -1],
               [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1],
               [0, 0, 0, -1, -1, -1, -1, -1, 0, 0, 0, -1],
               [0, 0, 0, 0, 2, 3, 1, 0, 0, 0, 0, -1],
               [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1],
               [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1],
               [-1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1],
               [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1]]
    env.render()
    time.sleep(100)