def test_text_envs(): env = scigym.make('FrozenLake-v0') video = VideoRecorder(env) try: env.reset() video.capture_frame() video.close() finally: os.remove(video.path)
def test_record_simple(): env = scigym.make("CartPole-v1") rec = VideoRecorder(env) env.reset() rec.capture_frame() rec.close() assert not rec.empty assert not rec.broken assert os.path.exists(rec.path) f = open(rec.path) assert os.fstat(f.fileno()).st_size > 100
#!/usr/bin/env python3 import argparse import scigym parser = argparse.ArgumentParser( description='Renders a SciGym environment for quick inspection.') parser.add_argument( 'env_id', type=str, help='the ID of the environment to be rendered (e.g. CartPole-v0') parser.add_argument('--step', type=int, default=1) args = parser.parse_args() env = scigym.make(args.env_id) env.reset() step = 0 while True: if args.step: env.step(env.action_space.sample()) env.render() if step % 10 == 0: env.reset() step += 1
import scigym import numpy as np env = scigym.make('teleportation-v0') if isinstance(env.action_space, scigym.Discrete): num_actions = env.action_space.n else: raise ValueError actions = range(num_actions) observation = env.reset() action = np.random.choice(actions) i = 0 done = False print("Cantidad de acciones:", actions) print("Obsevation 0: {}".format(observation)) print("Action 0: {}".format(action)) while done == False or i > 100: i += 1 (observation, reward, done, _) = env.step(action) print("----------") print("Obsevation {}: {}".format(i, observation)) print("Reward {}: {}".format(i, reward)) print("Done {}: {}".format(i, done)) print("Info {}: {}".format(i, _['available_actions'])) if bool(_['available_actions']): action = np.random.choice(_['available_actions']) print("Action {}: {}".format(i, action)) else: print("No more available actions")
#!/usr/bin/env python # from __future__ import print_function import sys, scigym, time # # Test yourself as a learning agent! Pass environment name as a command-line argument, for example: # # python keyboard_agent.py SpaceInvadersNoFrameskip-v4 # env = scigym.make( 'CartPole-v0' if len(sys.argv) < 2 else sys.argv[1]) #does this work? if not hasattr(env.action_space, 'n'): raise Exception('Keyboard agent only supports discrete action spaces') ACTIONS = env.action_space.n SKIP_CONTROL = 0 # Use previous control decision SKIP_CONTROL times, that's how you # can test what skip is still usable. human_agent_action = 0 human_wants_restart = False human_sets_pause = False def key_press(key, mod): global human_agent_action, human_wants_restart, human_sets_pause if key == 0xff0d: human_wants_restart = True if key == 32: human_sets_pause = not human_sets_pause a = int(key - ord('0'))
(ob, reward, done, _info) = env.step(a) total_rew += reward if render and t % 3 == 0: env.render() if done: break return total_rew, t + 1 if __name__ == '__main__': logger.set_level(logger.INFO) parser = argparse.ArgumentParser() parser.add_argument('--display', action='store_true') parser.add_argument('target', nargs="?", default="CartPole-v0") args = parser.parse_args() env = scigym.make(args.target) env.seed(0) np.random.seed(0) params = dict(n_iter=10, batch_size=25, elite_frac=0.2) num_steps = 200 # You provide the directory to write to (can be an existing # directory, but can't contain previous monitor results. You can # also dump to a tempdir if you'd like: tempfile.mkdtemp(). outdir = '/tmp/cem-agent-results' env = wrappers.Monitor(env, outdir, force=True) # Prepare snapshotting # ---------------------------------------- def writefile(fname, s): with open(path.join(outdir, fname), 'w') as fh:
num_plots = len(self.plot_names) self.fig, self.ax = plt.subplots(num_plots) if num_plots == 1: self.ax = [self.ax] for axis, name in zip(self.ax, plot_names): axis.set_title(name) self.t = 0 self.cur_plot = [None for _ in range(num_plots)] self.data = [deque(maxlen=horizon_timesteps) for _ in range(num_plots)] def callback(self, obs_t, obs_tp1, action, rew, done, info): points = self.data_callback(obs_t, obs_tp1, action, rew, done, info) for point, data_series in zip(points, self.data): data_series.append(point) self.t += 1 xmin, xmax = max(0, self.t - self.horizon_timesteps), self.t for i, plot in enumerate(self.cur_plot): if plot is not None: plot.remove() self.cur_plot[i] = self.ax[i].scatter(range(xmin, xmax), list(self.data[i])) self.ax[i].set_xlim(xmin, xmax) plt.pause(0.000001) if __name__ == '__main__': env = scigym.make("Acrobot-v1") # does not work with classic environments play(env, zoom=4, fps=60)
import scigym import numpy as np env = scigym.make('surfacecode-decoding-v0') if isinstance(env.action_space,scigym.Discrete): num_actions = env.action_space.n else: raise ValueError actions = range(num_actions) observation = env.reset() action = np.random.choice(actions) done = False while not done: (observation, reward, done) = env.step(action) action = np.random.choice(actions)