def test_semisuper_succeeds(): """Regression test. Ensure that this can write""" with helpers.tempdir() as temp: env = gym.make('SemisuperPendulumDecay-v0') env = Monitor(env, temp) env.reset() env.step(env.action_space.sample()) env.close()
def test_video_callable_false_does_not_record(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env = Monitor(env, temp, video_callable=False) env.reset() env.close() results = monitoring.load_results(temp) assert len(results['videos']) == 0
def test_configured(): env = gym.make("FrozenLake-v0") env.configure() # Make sure all layers of wrapping are configured assert env._configured assert env.env._configured env.close()
def test_monitor_filename(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env = Monitor(env, directory=temp) env.close() manifests = glob.glob(os.path.join(temp, '*.manifest.*')) assert len(manifests) == 1
def test_video_callable_records_videos(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env = Monitor(env, temp) env.reset() env.close() results = monitoring.load_results(temp) assert len(results['videos']) == 1, "Videos: {}".format( results['videos'])
def test_text_envs(): env = gym.make('FrozenLake-v0') video = VideoRecorder(env) try: env.reset() video.capture_frame() video.close() finally: os.remove(video.path)
def test_video_callable_true_not_allowed(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') try: env = Monitor(env, temp, video_callable=True) except error.Error: pass else: assert False
def test_semisuper_true_rewards(): env = gym.make('SemisuperPendulumNoise-v0') env.reset() observation, perceived_reward, done, info = env.step( env.action_space.sample()) true_reward = info['true_reward'] # The noise in the reward should ensure these are different. If we get spurious errors, we can remove this check assert perceived_reward != true_reward
def test_record_simple(): env = gym.make("CartPole-v1") rec = VideoRecorder(env) env.reset() rec.capture_frame() rec.close() assert not rec.empty assert not rec.broken assert os.path.exists(rec.path) f = open(rec.path) assert os.fstat(f.fileno()).st_size > 100
def test_no_double_wrapping(): temp = tempfile.mkdtemp() try: env = gym.make("FrozenLake-v0") env = wrappers.Monitor(env, temp) try: env = wrappers.Monitor(env, temp) except error.DoubleWrapperError: pass else: assert False, "Should not allow double wrapping" env.close() finally: shutil.rmtree(temp)
def test_write_upon_reset_false(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env = Monitor(env, directory=temp, video_callable=False, write_upon_reset=False) env.reset() files = glob.glob(os.path.join(temp, '*')) assert not files, "Files: {}".format(files) env.close() files = glob.glob(os.path.join(temp, '*')) assert len(files) > 0
def test_steps_limit_restart(): with helpers.tempdir() as temp: env = gym.make('test.StepsLimitCartpole-v0') env = Monitor(env, temp, video_callable=False) env.reset() # Episode has started _, _, done, info = env.step(env.action_space.sample()) assert done == False # Limit reached, now we get a done signal and the env resets itself _, _, done, info = env.step(env.action_space.sample()) assert done == True assert env.episode_id == 1 env.close()
def test(): benchmark = registration.Benchmark(id='MyBenchmark-v0', scorer=scoring.ClipTo01ThenAverage(), tasks=[{ 'env_id': 'CartPole-v0', 'trials': 1, 'max_timesteps': 5 }, { 'env_id': 'CartPole-v0', 'trials': 1, 'max_timesteps': 100, }]) with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env = wrappers.Monitor(env, directory=temp, video_callable=False) env.seed(0) env.set_monitor_mode('evaluation') rollout(env) env.set_monitor_mode('training') for i in range(2): rollout(env) env.set_monitor_mode('evaluation') rollout(env, good=True) env.close() results = monitoring.load_results(temp) evaluation_score = benchmark.score_evaluation( 'CartPole-v0', results['data_sources'], results['initial_reset_timestamps'], results['episode_lengths'], results['episode_rewards'], results['episode_types'], results['timestamps']) benchmark_score = benchmark.score_benchmark({ 'CartPole-v0': evaluation_score['scores'], }) assert np.all( np.isclose(evaluation_score['scores'], [0.00089999999999999998, 0.0054000000000000003 ])), "evaluation_score={}".format(evaluation_score) assert np.isclose( benchmark_score, 0.00315), "benchmark_score={}".format(benchmark_score)
def test_env_reuse(): with helpers.tempdir() as temp: env = gym.make('Autoreset-v0') env = Monitor(env, temp) env.reset() _, _, done, _ = env.step(None) assert not done _, _, done, _ = env.step(None) assert done _, _, done, _ = env.step(None) assert not done _, _, done, _ = env.step(None) assert done env.close()
def test_only_complete_episodes_written(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') env = Monitor(env, temp, video_callable=False) env.reset() d = False while not d: _, _, d, _ = env.step(env.action_space.sample()) env.reset() env.step(env.action_space.sample()) env.close() # Only 1 episode should be written results = monitoring.load_results(temp) assert len(results['episode_lengths'] ) == 1, "Found {} episodes written; expecting 1".format( len(results['episode_lengths']))
def test_write_upon_reset_true(): with helpers.tempdir() as temp: env = gym.make('CartPole-v0') # TODO: Fix Cartpole to not configure itself automatically # assert not env._configured env = Monitor(env, directory=temp, video_callable=False, write_upon_reset=True) env.configure() env.reset() files = glob.glob(os.path.join(temp, '*')) assert len(files) > 0, "Files: {}".format(files) env.close() files = glob.glob(os.path.join(temp, '*')) assert len(files) > 0
def test_no_monitor_reset_unless_done(): def assert_reset_raises(env): errored = False try: env.reset() except error.Error: errored = True assert errored, "Env allowed a reset when it shouldn't have" with helpers.tempdir() as temp: # Make sure we can reset as we please without monitor env = gym.make('CartPole-v0') env.reset() env.step(env.action_space.sample()) env.step(env.action_space.sample()) env.reset() # can reset once as soon as we start env = Monitor(env, temp, video_callable=False) env.reset() # can reset multiple times in a row env.reset() env.reset() env.step(env.action_space.sample()) env.step(env.action_space.sample()) assert_reset_raises(env) # should allow resets after the episode is done d = False while not d: _, _, d, _ = env.step(env.action_space.sample()) env.reset() env.reset() env.step(env.action_space.sample()) assert_reset_raises(env) env.close()
def test_skip(): every_two_frame = SkipWrapper(2) env = gym.make("FrozenLake-v0") env = every_two_frame(env) obs = env.reset() env.render()
def callback(self, obs_t, obs_tp1, action, rew, done, info): points = self.data_callback(obs_t, obs_tp1, action, rew, done, info) for point, data_series in zip(points, self.data): data_series.append(point) self.t += 1 xmin, xmax = max(0, self.t - self.horizon_timesteps), self.t for i, plot in enumerate(self.cur_plot): if plot is not None: plot.remove() self.cur_plot[i] = self.ax[i].scatter(range(xmin, xmax), list(self.data[i])) self.ax[i].set_xlim(xmin, xmax) plt.pause(0.000001) if __name__ == '__main__': from rl_algs.common.atari_wrappers import wrap_deepmind def callback(obs_t, obs_tp1, action, rew, done, info): return [rew, obs_t.mean()] env_plotter = EnvPlotter(callback, 30 * 5, ["reward", "mean intensity"]) env = gym.make("MontezumaRevengeNoFrameskip-v3") env = wrap_deepmind(env) play_env(env, zoom=4, callback=env_plotter.callback, fps=30)