Beispiel #1
0
def test_text_envs():
    env = scigym.make('FrozenLake-v0')
    video = VideoRecorder(env)
    try:
        env.reset()
        video.capture_frame()
        video.close()
    finally:
        os.remove(video.path)
Beispiel #2
0
def test_record_simple():
    env = scigym.make("CartPole-v1")
    rec = VideoRecorder(env)
    env.reset()
    rec.capture_frame()
    rec.close()
    assert not rec.empty
    assert not rec.broken
    assert os.path.exists(rec.path)
    f = open(rec.path)
    assert os.fstat(f.fileno()).st_size > 100
Beispiel #3
0
#!/usr/bin/env python3
import argparse
import scigym

parser = argparse.ArgumentParser(
    description='Renders a SciGym environment for quick inspection.')
parser.add_argument(
    'env_id',
    type=str,
    help='the ID of the environment to be rendered (e.g. CartPole-v0')
parser.add_argument('--step', type=int, default=1)
args = parser.parse_args()

env = scigym.make(args.env_id)
env.reset()

step = 0
while True:
    if args.step:
        env.step(env.action_space.sample())
    env.render()
    if step % 10 == 0:
        env.reset()
    step += 1
import scigym
import numpy as np

env = scigym.make('teleportation-v0')
if isinstance(env.action_space, scigym.Discrete):
    num_actions = env.action_space.n
else:
    raise ValueError

actions = range(num_actions)
observation = env.reset()
action = np.random.choice(actions)
i = 0
done = False
print("Cantidad de acciones:", actions)
print("Obsevation 0: {}".format(observation))
print("Action 0: {}".format(action))

while done == False or i > 100:
    i += 1
    (observation, reward, done, _) = env.step(action)
    print("----------")
    print("Obsevation {}: {}".format(i, observation))
    print("Reward {}: {}".format(i, reward))
    print("Done {}: {}".format(i, done))
    print("Info {}: {}".format(i, _['available_actions']))
    if bool(_['available_actions']):
        action = np.random.choice(_['available_actions'])
        print("Action {}: {}".format(i, action))
    else:
        print("No more available actions")
Beispiel #5
0
#!/usr/bin/env python
#
from __future__ import print_function

import sys, scigym, time

#
# Test yourself as a learning agent! Pass environment name as a command-line argument, for example:
#
# python keyboard_agent.py SpaceInvadersNoFrameskip-v4
#

env = scigym.make(
    'CartPole-v0' if len(sys.argv) < 2 else sys.argv[1])  #does this work?

if not hasattr(env.action_space, 'n'):
    raise Exception('Keyboard agent only supports discrete action spaces')
ACTIONS = env.action_space.n
SKIP_CONTROL = 0  # Use previous control decision SKIP_CONTROL times, that's how you
# can test what skip is still usable.

human_agent_action = 0
human_wants_restart = False
human_sets_pause = False


def key_press(key, mod):
    global human_agent_action, human_wants_restart, human_sets_pause
    if key == 0xff0d: human_wants_restart = True
    if key == 32: human_sets_pause = not human_sets_pause
    a = int(key - ord('0'))
Beispiel #6
0
        (ob, reward, done, _info) = env.step(a)
        total_rew += reward
        if render and t % 3 == 0: env.render()
        if done: break
    return total_rew, t + 1


if __name__ == '__main__':
    logger.set_level(logger.INFO)

    parser = argparse.ArgumentParser()
    parser.add_argument('--display', action='store_true')
    parser.add_argument('target', nargs="?", default="CartPole-v0")
    args = parser.parse_args()

    env = scigym.make(args.target)
    env.seed(0)
    np.random.seed(0)
    params = dict(n_iter=10, batch_size=25, elite_frac=0.2)
    num_steps = 200

    # You provide the directory to write to (can be an existing
    # directory, but can't contain previous monitor results. You can
    # also dump to a tempdir if you'd like: tempfile.mkdtemp().
    outdir = '/tmp/cem-agent-results'
    env = wrappers.Monitor(env, outdir, force=True)

    # Prepare snapshotting
    # ----------------------------------------
    def writefile(fname, s):
        with open(path.join(outdir, fname), 'w') as fh:
Beispiel #7
0
        num_plots = len(self.plot_names)
        self.fig, self.ax = plt.subplots(num_plots)
        if num_plots == 1:
            self.ax = [self.ax]
        for axis, name in zip(self.ax, plot_names):
            axis.set_title(name)
        self.t = 0
        self.cur_plot = [None for _ in range(num_plots)]
        self.data = [deque(maxlen=horizon_timesteps) for _ in range(num_plots)]

    def callback(self, obs_t, obs_tp1, action, rew, done, info):
        points = self.data_callback(obs_t, obs_tp1, action, rew, done, info)
        for point, data_series in zip(points, self.data):
            data_series.append(point)
        self.t += 1

        xmin, xmax = max(0, self.t - self.horizon_timesteps), self.t

        for i, plot in enumerate(self.cur_plot):
            if plot is not None:
                plot.remove()
            self.cur_plot[i] = self.ax[i].scatter(range(xmin, xmax),
                                                  list(self.data[i]))
            self.ax[i].set_xlim(xmin, xmax)
        plt.pause(0.000001)


if __name__ == '__main__':
    env = scigym.make("Acrobot-v1")  # does not work with classic environments
    play(env, zoom=4, fps=60)
Beispiel #8
0
import scigym
import numpy as np

env = scigym.make('surfacecode-decoding-v0')

if isinstance(env.action_space,scigym.Discrete):
    num_actions = env.action_space.n
else:
    raise ValueError

actions = range(num_actions)
observation = env.reset()
action = np.random.choice(actions)

done = False
while not done:
    (observation, reward, done) = env.step(action)
    action = np.random.choice(actions)