def test_episode_count(self): cartpole_env = gym.make('CartPole-v1') env = gym_wrapper.GymWrapper(cartpole_env) env = wrappers.RunStats(env) self.assertEqual(0, env.episodes) time_step = env.reset() self.assertEqual(0, env.episodes) for episode_num in range(1, 4): while not time_step.is_last(): time_step = env.step(np.array(1, dtype=np.int32)) self.assertEqual(episode_num, env.episodes) time_step = env.step(np.array(1, dtype=np.int32))
def test_episode_count_with_time_limit(self): cartpole_env = gym.make('CartPole-v1') env = gym_wrapper.GymWrapper(cartpole_env) env = wrappers.TimeLimit(env, 2) env = wrappers.RunStats(env) env.reset() self.assertEqual(0, env.episodes) env.step(np.array(0, dtype=np.int32)) time_step = env.step(np.array(0, dtype=np.int32)) self.assertTrue(time_step.is_last()) self.assertEqual(1, env.episodes)
def test_resets_count(self): cartpole_env = gym.make('CartPole-v1') env = gym_wrapper.GymWrapper(cartpole_env) env = wrappers.RunStats(env) self.assertEqual(0, env.resets) time_step = env.reset() self.assertEqual(1, env.resets) resets = 1 for _ in range(0, 4): while not time_step.is_last(): self.assertEqual(resets, env.resets) time_step = env.step(np.array(1, dtype=np.int32)) time_step = env.step(np.array(1, dtype=np.int32)) resets += 1
def test_step_count(self): cartpole_env = gym.make('CartPole-v1') env = gym_wrapper.GymWrapper(cartpole_env) env = wrappers.RunStats(env) self.assertEqual(0, env.episodes) time_step = env.reset() self.assertEqual(0, env.episodes) steps = 0 for _ in range(0, 4): while not time_step.is_last(): self.assertEqual(steps, env.total_steps) time_step = env.step(np.array(1, dtype=np.int32)) steps += 1 time_step = env.step(np.array(1, dtype=np.int32))
from dual_goal_maze_env import DualGoalMaze from tf_agents.environments import utils from tf_agents.environments import wrappers import tensorflow as tf import numpy as np from tf_agents.environments import py_environment from tf_agents.environments import tf_environment from tf_agents.environments import tf_py_environment from tf_agents.specs import array_spec from tf_agents.environments import suite_gym from tf_agents.trajectories import time_step as ts environment = DualGoalMaze() stats_env = wrappers.RunStats(environment) utils.validate_py_environment(stats_env, episodes=5) time_step = stats_env.reset() rewards = [] steps = [] num_episodes = 5 for _ in range(num_episodes): episode_reward = 0 episode_steps = 0 while not time_step.is_last(): action = np.random.randint(0, 4) time_step = stats_env.step(action) episode_steps += 1 episode_reward += time_step.reward