def test_passreward(env_name='PerceptualDecisionMaking-v0', num_steps=1000, verbose=False): """ Test pass-reward wrapper. TODO: explain wrapper Parameters ---------- env_name : str, optional enviroment to wrap.. The default is 'PerceptualDecisionMaking-v0'. num_steps : int, optional number of steps to run the environment (1000) verbose : boolean, optional whether to print observation and reward (False) Returns ------- None. """ env = gym.make(env_name) env = PassReward(env) obs = env.reset() for stp in range(num_steps): action = env.action_space.sample() obs, rew, done, info = env.step(action) assert obs[-1] == rew, 'Previous reward is not part of observation' if verbose: print(obs) print(rew) print('--------') if done: env.reset()
def test_passreward(env_name, num_steps=10000, verbose=False, **envArgs): env = gym.make(env_name, **envArgs) env = PassReward(env) obs = env.reset() for stp in range(num_steps): action = env.action_space.sample() obs, rew, done, info = env.step(action) if verbose: print(obs) print(rew) print('--------') if done: env.reset()