def test(ag1, ag2, render=False, load_model=False): test_env = CMOTP() if load_model: ag1.load_model() ag2.load_model() ep_log = [] for iii in range(TEST_NUM): print('test {}'.format(iii)) state1, state2 = test_env.reset() ep_len = 0 while True: if render: test_env.render() time.sleep(1) action1 = ag1.choose_action(state1, 0, 0.0) action2 = ag2.choose_action(state2, 0, 0.0) next_state_, reward_, done_, _ = test_env.step([action1, action2]) next_state1, next_state2 = next_state_ state1 = next_state1 state2 = next_state2 ep_len += 1 if done_[0] or ep_len >= 1000: ep_log.append(ep_len) break return np.mean(ep_log)
sts_2 = np.asarray(sts_2, dtype=np.float32).reshape(train_input_shape) acts_2 = np.asarray(acts_2, dtype=np.int32).flatten() rwds_2 = np.asarray(rwds_2, dtype=np.float32).flatten() n_sts_2 = np.asarray(n_sts_2, dtype=np.float32).reshape(train_input_shape) dns_2 = np.asarray(dns_2, dtype=np.bool).flatten() ln_2 = np.asarray(ln_2, dtype=np.float32).flatten() # train agent1.train_without_replaybuffer(sts_1, acts_1, rwds_1, ln_1) agent2.train_without_replaybuffer(sts_2, acts_2, rwds_2, ln_2) else: test_env = CMOTP() agent1.load_model() agent2.load_model() for i in range(TEST_NUM): state1, state2 = test_env.reset() while True: test_env.render() time.sleep(1) action1 = agent1.choose_action(state1, 0, 0.0) action2 = agent2.choose_action(state2, 0, 0.0) next_state, reward, done, _ = test_env.step([action1, action2]) next_state1, next_state2 = next_state state1 = next_state1 state2 = next_state2 if done[0]: break env.close()
# next_state, reward, done, _ = env.step(action_n) # # len_this_episode += 1 # state = next_state # # if reward == 10.: # break # print(i, len_this_episode) # len_episodes.append(len_this_episode) # # print(np.mean(len_episodes)) from Env.cmotp_IL import CMOTP import time if __name__ == '__main__': env = CMOTP() state = env.reset() env.map = [[-1, -1, -1, 0, 0, 0, 0, 0, -1, -1, -1, -1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1], [-1, -1, -1, -1, 0, 0, 0, -1, -1, -1, -1, -1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1], [0, 0, 0, -1, -1, -1, -1, -1, 0, 0, 0, -1], [0, 0, 0, 0, 2, 3, 1, 0, 0, 0, 0, -1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1], [-1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1]] env.render() time.sleep(100)