Exemple #1
0
def train():

    tf.config.run_functions_eagerly(True)
    config = Basic_DQN_Conf()
    set_global_seeds(config.seed)

    # init env
    env = init_env(config, 'train')

    config.num_actions = env.action_space.n
    config.obs_shape = env.observation_space.shape

    agent = init_agent(config, env)
    agent.learn()


# # train()
# tf.config.run_functions_eagerly(True)
# config = Basic_DQN_Conf()
# # init env
# env = init_env(config, 'train')
#
# print(f'env.observation_space.shape {env.observation_space.shape}')
#
# obs = env.reset()
# print(obs.shape)
# print(obs.dtype)
# import matplotlib.pyplot as plt
#
# plt.imshow(obs[1, :, :, 0])
# plt.show()
Exemple #2
0
def train():
    tf.config.run_functions_eagerly(True)
    config = Basic_DQN_FP_RNN_2_Conf()
    set_global_seeds(config.seed)

    # init env
    env = init_env(config, 'train')

    config.num_actions = env.action_space.n
    config.obs_shape = env.observation_space.shape

    agent = init_agent(config, env)
    agent.learn()
Exemple #3
0
    def play_test_games(self):
        num_tests = self.config.num_tests

        test_env = init_env(self.config, mode='test')

        test_rewards = np.zeros(num_tests)
        for i in range(num_tests):
            test_done = False
            test_obs_all = test_env.reset()
            # print(np.asarray(test_obs_all).shape)
            while not test_done:
                test_obs_all = tf.constant(test_obs_all)
                test_action_list = self.choose_action(test_obs_all, stochastic=False)
                test_new_obs_list, test_rew_list, test_done, _ = test_env.step(test_action_list)
                test_obs_all = test_new_obs_list

                if test_done:
                    print(f'test_reward_dict for test {i} is {test_rew_list}')
                    test_rewards[i] = np.mean(test_rew_list)

        print(f'mean reward of {num_tests} tests is {np.mean(test_rewards)}')
        test_env.close()