def train(): tf.config.run_functions_eagerly(True) config = Basic_DQN_Conf() set_global_seeds(config.seed) # init env env = init_env(config, 'train') config.num_actions = env.action_space.n config.obs_shape = env.observation_space.shape agent = init_agent(config, env) agent.learn() # # train() # tf.config.run_functions_eagerly(True) # config = Basic_DQN_Conf() # # init env # env = init_env(config, 'train') # # print(f'env.observation_space.shape {env.observation_space.shape}') # # obs = env.reset() # print(obs.shape) # print(obs.dtype) # import matplotlib.pyplot as plt # # plt.imshow(obs[1, :, :, 0]) # plt.show()
def train(): tf.config.run_functions_eagerly(True) config = Basic_DQN_FP_RNN_2_Conf() set_global_seeds(config.seed) # init env env = init_env(config, 'train') config.num_actions = env.action_space.n config.obs_shape = env.observation_space.shape agent = init_agent(config, env) agent.learn()
def play_test_games(self): num_tests = self.config.num_tests test_env = init_env(self.config, mode='test') test_rewards = np.zeros(num_tests) for i in range(num_tests): test_done = False test_obs_all = test_env.reset() # print(np.asarray(test_obs_all).shape) while not test_done: test_obs_all = tf.constant(test_obs_all) test_action_list = self.choose_action(test_obs_all, stochastic=False) test_new_obs_list, test_rew_list, test_done, _ = test_env.step(test_action_list) test_obs_all = test_new_obs_list if test_done: print(f'test_reward_dict for test {i} is {test_rew_list}') test_rewards[i] = np.mean(test_rew_list) print(f'mean reward of {num_tests} tests is {np.mean(test_rewards)}') test_env.close()