コード例 #1
0
def test_make_deprecated():
    try:
        envs.make('Humanoid-v0')
    except error.Error:
        pass
    else:
        assert False
コード例 #2
0
def verify_environments_match(old_environment_id,
                              new_environment_id,
                              seed=1,
                              num_actions=1000):
    old_environment = envs.make(old_environment_id)
    new_environment = envs.make(new_environment_id)

    old_environment.seed(seed)
    new_environment.seed(seed)

    old_reset_observation = old_environment.reset()
    new_reset_observation = new_environment.reset()

    np.testing.assert_allclose(old_reset_observation, new_reset_observation)

    for i in range(num_actions):
        action = old_environment.action_space.sample()
        old_observation, old_reward, old_done, old_info = old_environment.step(
            action)
        new_observation, new_reward, new_done, new_info = new_environment.step(
            action)

        eps = 1e-6
        np.testing.assert_allclose(old_observation, new_observation, atol=eps)
        np.testing.assert_allclose(old_reward, new_reward, atol=eps)
        np.testing.assert_allclose(old_done, new_done, atol=eps)

        for key in old_info:
            np.testing.assert_allclose(old_info[key], new_info[key], atol=eps)
コード例 #3
0
def verify_environments_match(old_environment_id,
                              new_environment_id,
                              seed=1,
                              num_actions=1000):
    old_environment = envs.make(old_environment_id)
    new_environment = envs.make(new_environment_id)

    old_environment.seed(seed)
    new_environment.seed(seed)

    old_reset_observation = old_environment.reset()
    new_reset_observation = new_environment.reset()

    np.testing.assert_allclose(old_reset_observation, new_reset_observation)

    for i in range(num_actions):
        action = old_environment.action_space.sample()
        old_observation, old_reward, old_done, old_info = old_environment.step(
            action)
        new_observation, new_reward, new_done, new_info = new_environment.step(
            action)

        eps = 1e-6
        np.testing.assert_allclose(old_observation, new_observation, atol=eps)
        np.testing.assert_allclose(old_reward, new_reward, atol=eps)
        np.testing.assert_allclose(old_done, new_done, atol=eps)

        for key in old_info:
            np.testing.assert_allclose(old_info[key], new_info[key], atol=eps)
コード例 #4
0
def test_make_deprecated():
    try:
        envs.make("Humanoid-v0")
    except error.Error:
        pass
    else:
        assert False
コード例 #5
0
ファイル: test_envs.py プロジェクト: arboo/gym
def test_random_rollout():
    for env in [envs.make('CartPole-v0'), envs.make('FrozenLake-v0')]:
        agent = lambda ob: env.action_space.sample()
        ob = env.reset()
        for _ in range(10):
            assert env.observation_space.contains(ob)
            a = agent(ob)
            assert env.action_space.contains(a)
            (ob, _reward, done, _info) = env.step(a)
            if done: break
コード例 #6
0
def test_random_rollout():
    for env in [envs.make('CartPole-v0'), envs.make('FrozenLake-v0')]:
        agent = lambda ob: env.action_space.sample()
        ob = env.reset()
        for _ in range(10):
            assert env.observation_space.contains(ob)
            a = agent(ob)
            assert env.action_space.contains(a)
            (ob, _reward, done, _info) = env.step(a)
            if done: break
コード例 #7
0
ファイル: test_envs.py プロジェクト: thepinkturtle/gym
def test_env_render_result_is_immutable():
    environs = [
        envs.make("Taxi-v3"),
        envs.make("FrozenLake-v1"),
    ]

    for env in environs:
        env.reset()
        output = env.render(mode="ansi")
        assert isinstance(output, str)
        env.close()
コード例 #8
0
def test_env_render_result_is_immutable():
    environs = [
        envs.make('Taxi-v3'),
        envs.make('FrozenLake-v0'),
        envs.make('Reverse-v0'),
    ]

    for env in environs:
        env.reset()
        output = env.render(mode='ansi')
        assert isinstance(output, str)
        env.close()
コード例 #9
0
ファイル: test_envs.py プロジェクト: jiapei100/gym
def test_env_render_result_is_immutable():
    from six import string_types
    environs = [
        envs.make('Taxi-v2'),
        envs.make('FrozenLake-v0'),
        envs.make('Reverse-v0'),
    ]

    for env in environs:
        env.reset()
        output = env.render(mode='ansi')
        assert isinstance(output, string_types)
        env.close()
コード例 #10
0
def test_make_with_kwargs():
    env = envs.make("test.ArgumentEnv-v0", arg2="override_arg2", arg3="override_arg3")
    assert env.spec.id == "test.ArgumentEnv-v0"
    assert isinstance(env.unwrapped, ArgumentEnv)
    assert env.arg1 == "arg1"
    assert env.arg2 == "override_arg2"
    assert env.arg3 == "override_arg3"
コード例 #11
0
def main():
    # Initialize environment
    env = envs.make('minecraft-v0')
    env.reset()

    # Get action space
    #action_space = env.action_space

    # Plan some random stuff to do
    actions = [  # agent_id, action_num
        (1, 0),
        (2, 4),
        (2, 3),
        (2, 0),
        (1, 3),
        (2, 2),
        (2, 5),  # gets reward of 1
    ]

    # Do some random stuff to the env for 2 times
    for i in range(2):
        for a in actions:
            state1, reward, done, info = env.step(a)

            if i < 1:
                # output results
                print('reward: ', reward)
                print('facing:', state1['facing'])
                print('position:', state1['position'])
                print(state1['view'][::-1, 1, :])

        # Reset env
        env.reset()
コード例 #12
0
def test_grayscale():
    env = envs.make('Pong-v0')
    env.env._obs_type = 'grayscale_image'
    ob = env.reset()
    assert ob.shape == (210, 160, 1)
    ob = env.render('grayscale_array')
    assert ob.shape == (210, 160, 1)
コード例 #13
0
def test_env_version_suggestions(register_some_envs, env_id_input,
                                 suggested_versions, default_version):
    if default_version:
        match_str = "provides the default version"
        with pytest.raises(
                error.DeprecatedEnv,
                match=match_str,
        ):
            envs.make(env_id_input)
    else:
        match_str = f"versioned environments: \\[ {suggested_versions} \\]"
        with pytest.raises(
                error.UnregisteredEnv,
                match=match_str,
        ):
            envs.make(env_id_input)
コード例 #14
0
ファイル: test_registration.py プロジェクト: jiapei100/gym
def test_make_with_kwargs():
    env = envs.make('test.ArgumentEnv-v0', arg2='override_arg2', arg3='override_arg3')
    assert env.spec.id == 'test.ArgumentEnv-v0'
    assert isinstance(env.unwrapped, ArgumentEnv)
    assert env.arg1 == 'arg1'
    assert env.arg2 == 'override_arg2'
    assert env.arg3 == 'override_arg3'
コード例 #15
0
def test_serialize_deserialize():
    env1 = envs.make('HandReach-v0', distance_threshold=1e-6)
    env1.reset()
    env2 = pickle.loads(pickle.dumps(env1))

    assert env1.distance_threshold == env2.distance_threshold, (
        env1.distance_threshold, env2.distance_threshold)
コード例 #16
0
def test_serialize_deserialize(environment_id):
    env1 = envs.make(environment_id, target_position='fixed')
    env1.reset()
    env2 = pickle.loads(pickle.dumps(env1))

    assert env1.target_position == env2.target_position, (env1.target_position,
                                                          env2.target_position)
コード例 #17
0
def test_make_with_kwargs():
    env = envs.make('test.ArgumentEnv-v0',
                    arg2='override_arg2',
                    arg3='override_arg3')
    assert env.spec.id == 'test.ArgumentEnv-v0'
    assert isinstance(env.unwrapped, ArgumentEnv)
    assert env.arg1 == 'arg1'
    assert env.arg2 == 'override_arg2'
    assert env.arg3 == 'override_arg3'
コード例 #18
0
def test_serialize_deserialize(environment_id):
    env = envs.make(environment_id)
    env.reset()

    with pytest.raises(ValueError, match="Action dimension mismatch"):
        env.step([0.1])

    with pytest.raises(ValueError, match="Action dimension mismatch"):
        env.step(0.1)
コード例 #19
0
ファイル: test_adserver.py プロジェクト: wizcap/gym-adserver
def test_environment_reset():
    # Arrange 
    env = envs.make('AdServer-v0', num_ads=2, time_series_frequency=10)

    # Act
    (ads, impressions, clicks) = env.reset('Test')

    # Assert
    assert clicks == 0
    assert impressions == 0
    assert ads == [Ad(0), Ad(1)]
コード例 #20
0
ファイル: test_adserver.py プロジェクト: wizcap/gym-adserver
def test_environment_step_with_reward():
    # Arrange
    env = envs.make('AdServer-v0', num_ads=2, time_series_frequency=10, reward_policy=lambda x: 1)
    env.reset(scenario_name='Test')

    # Act
    ((ads, impressions, clicks), reward, done, info) = env.step(1)

    # Assert
    assert clicks == 1
    assert impressions == 1
    assert info == {}
    assert reward == 1
    assert not done
    assert ads == [Ad(0), Ad(1, impressions=1, clicks=1)]
コード例 #21
0
def make_envs_by_action_space_type(spec_list: List[EnvSpec],
                                   action_space: Space):
    """Make environments of specific action_space type.

    This function returns a filtered list of environment from the
    spec_list that matches the action_space type.

    Args:
        spec_list (list): list of registered environments' specification
        action_space (gym.spaces.Space): action_space type
    """
    filtered_envs = []
    for spec in spec_list:
        env = envs.make(spec.id)
        if isinstance(env.action_space, action_space):
            filtered_envs.append(env)
    return filtered_envs
コード例 #22
0
def test_box_actions_out_of_bound(env, seed):
    """Test out of bound actions in Box action_space.

    Environments with Box actions spaces perform clipping inside `step`.
    The expected behaviour is that an action `out-of-bound` has the same effect
    of an action with value exactly at the upper (or lower) bound.

    Args:
        env (gym.Env): the gym environment
        seed (int): seed value for determinism
    """
    OOB_VALUE = 100

    env.reset(seed=seed)

    oob_env = envs.make(env.spec.id)
    oob_env.reset(seed=seed)

    dtype = env.action_space.dtype

    upper_bounds = env.action_space.high
    lower_bounds = env.action_space.low

    for i, (is_upper_bound, is_lower_bound) in enumerate(
            zip(env.action_space.bounded_above,
                env.action_space.bounded_below)):
        if is_upper_bound:
            obs, _, _, _ = env.step(upper_bounds)
            oob_action = upper_bounds.copy()
            oob_action[i] += np.cast[dtype](OOB_VALUE)

            assert oob_action[i] > upper_bounds[i]
            oob_obs, _, _, _ = oob_env.step(oob_action)

            assert np.alltrue(obs == oob_obs)

        if is_lower_bound:
            obs, _, _, _ = env.step(lower_bounds)
            oob_action = lower_bounds.copy()
            oob_action[i] -= np.cast[dtype](OOB_VALUE)

            assert oob_action[i] < lower_bounds[i]
            oob_obs, _, _, _ = oob_env.step(oob_action)

            assert np.alltrue(obs == oob_obs)
コード例 #23
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--raw_actions', action='store_true')
    args = parser.parse_args()

    env = envs.make('Go9x9-v0')
    env.reset()
    while True:
        s = env._state
        env._render()

        colorstr = pachi_py.color_to_str(s.color)
        if args.raw_actions:
            a = int(raw_input('{} (raw)> '.format(colorstr)))
        else:
            coordstr = raw_input('{}> '.format(colorstr))
            a = go.str_to_action(s.board, coordstr)

        _, r, done, _ = env.step(a)
        if done:
            break

    print('You win!' if r > 0 else 'Opponent wins!')
    print('Final score:', env._state.board.official_score)
コード例 #24
0
ファイル: main.py プロジェクト: ceobillionaire/trpo
        env = self.env
        ret = []
        for o, r, d in zip(observation_n, reward_n, done_n):
            o = env.observation_convert(o, env._env.observation_space, env.observation_space)  
            obs = np.expand_dims(o, 0)
            action_dist_n = self.session.run(self.action_dist_n, {self.obs: obs})
            action = int(np.argmax(action_dist_n, 1)[0])
            action = env.action_convert(action, env.action_space, env._env.action_space)
            ret.append(action)
        return ret


experiment_dir = tempfile.mkdtemp()
logging.getLogger().setLevel(logging.DEBUG)
print ("taks = {}".format(args.task))
env = envs.make(args.task)


env.monitor.start(experiment_dir)

agent = ContinTRPOAgent(env)
agent.learn()
env.monitor.close()
gym.upload(experiment_dir, algorithm_id=algo)


print (experiment_dir)

from sys import argv
print ('python {}'.format(' '.join(argv)))
コード例 #25
0
def test_make():
    env = envs.make("CartPole-v0")
    assert env.spec.id == "CartPole-v0"
    assert isinstance(env.unwrapped, cartpole.CartPoleEnv)
コード例 #26
0
ファイル: main.py プロジェクト: wojzaremba/trpo
                stats["KL between old and new distribution"] = kloldnew
                stats["Surrogate loss"] = surrafter
                for k, v in stats.iteritems():
                    print(k + ": " + " " * (40 - len(k)) + str(v))
                if entropy != entropy:
                    exit(-1)
                if exp > 0.8:
                    self.train = False
            i += 1

training_dir = tempfile.mkdtemp()
logging.getLogger().setLevel(logging.DEBUG)

if len(sys.argv) > 1:
    task = sys.argv[1]
else:
    task = "RepeatCopy-v0"

env = envs.make(task)
env.monitor.start(training_dir)

env = SpaceConversionEnv(env, Box, Discrete)

agent = TRPOAgent(env)
agent.learn()
env.monitor.close()
gym.upload(training_dir,
           algorithm_id='trpo_ff')


コード例 #27
0
ファイル: run_cem.py プロジェクト: BonsaiAI/modular_rl
from .modular_rl import *
import argparse, sys, pickle, shutil
import gym, logging

from tabulate import tabulate

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    update_argument_parser(parser, GENERAL_OPTIONS)
    parser.add_argument("--env", required=True)
    parser.add_argument("--agent", required=True)
    parser.add_argument("--plot", action="store_true")
    args, _ = parser.parse_known_args(
        [arg for arg in sys.argv[1:] if arg not in ('-h', '--help')])
    env = make(args.env)
    env_spec = env.spec
    mondir = args.outfile + ".dir"
    if os.path.exists(mondir): shutil.rmtree(mondir)
    os.mkdir(mondir)
    env.monitor.start(mondir,
                      video_callable=None if args.video else VIDEO_NEVER)
    agent_ctor = get_agent_cls(args.agent)
    update_argument_parser(parser, agent_ctor.options)
    update_argument_parser(parser, CEM_OPTIONS)
    args = parser.parse_args()
    cfg = args.__dict__
    agent = agent_ctor(env.observation_space, env.action_space, cfg)
    np.random.seed(args.seed)
    hdf, diagnostics = prepare_h5_file(args)
コード例 #28
0
ファイル: realtime_sim.py プロジェクト: SergiPonsa/Mujoco_TFM
                        type=str,
                        help="name of the environment. Options: Gen3-v0")
    parser.add_argument("--mode",
                        choices=["noop", "random", "human"],
                        default="random",
                        help="mode of the agent")
    parser.add_argument("--max_steps",
                        type=int,
                        default=0,
                        help="maximum episode length")
    parser.add_argument("--fps", type=float)
    parser.add_argument("--once", action="store_true")
    parser.add_argument("--ignore_done", action="store_true")
    args = parser.parse_args()

    env = envs.make(args.env)
    action_space = env.action_space
    mode = args.mode
    fps = args.fps or env.metadata.get('video.frames_per_second') or 100
    if args.max_steps == 0:
        args.max_steps = env.spec.tags[
            'wrapper_config.TimeLimit.max_episode_steps']
        print("max_steps = ", args.max_steps)

    print("Press ESC to quit")
    reward = 0
    done = False
    if mode == "random":
        agent = RandomAgent(action_space)
    elif mode == "noop":
        agent = NoopAgent(action_space)
コード例 #29
0
import numpy as np

import gym
from gym import spaces, envs

gym.undo_logger_setup()
import logging; logging.getLogger('gym.core').addHandler(logging.NullHandler())

num_trials = 50

print 'Name & Random policy performance'

names = ['CartPole-v0', 'Acrobot-v0', 'MountainCar-v0', 'Reacher-v1', 'HalfCheetah-v1', 'Hopper-v1', 'Walker2d-v1', 'Ant-v1', 'Humanoid-v1']
for env_name in names:
    env = envs.make(env_name)

    returns = []
    for _ in xrange(num_trials):
        env.reset()
        ret = 0.
        for _ in xrange(env.spec.timestep_limit):
            _, r, done, _ = env.step(env.action_space.sample())
            ret += r
            if done: break
        returns.append(ret)

    print '{} & {} \pm {}'.format(env_name, np.mean(returns), np.std(returns))
コード例 #30
0
ファイル: kerlym.py プロジェクト: chemouda/kerlym
parser.add_option("-d", "--discount", dest="discount", default=0.99, type='float',      help="Discount rate for future reards [%default]")
parser.add_option("-t", "--num_frames", dest="nframes", default=2, type='int',          help="Number of Sequential observations/timesteps to store in a single example [%default]")
parser.add_option("-m", "--max_mem", dest="maxmem", default=100000, type='int',         help="Max number of samples to remember [%default]")
parser.add_option("-P", "--plots", dest="plots", action="store_true", default=False,    help="Plot learning statistics while running [%default]")
parser.add_option("-F", "--plot_rate", dest="plot_rate", default=10, type='int',        help="Plot update rate in episodes [%default]")
parser.add_option("-S", "--submit", dest="submit", action="store_true", default=False,  help="Submit Results to OpenAI [%default]")
parser.add_option("-a", "--agent", dest="agent", default="ddqn",                        help="Which learning algorithm to use [%default]")
(options, args) = parser.parse_args()

print options.agent

training_dir = tempfile.mkdtemp()
logging.getLogger().setLevel(logging.DEBUG)

from gym import envs
env = envs.make(options.env)
if options.submit:
    env.monitor.start(training_dir)

import dqn
agent_constructor = {
    "dqn":dqn.DQN,
    "ddqn":dqn.D2QN
}[options.agent]

agent = agent_constructor(env, nframes=options.nframes, epsilon=options.epsilon, discount=options.discount, modelfactory=eval("dqn.%s"%(options.net)),
                    epsilon_schedule=lambda episode,epsilon: max(0.05, epsilon*(1-options.epsilon_decay)),
                    update_nsamp=options.update_size, batch_size=options.bs, dropout=options.dropout,
                    timesteps_per_batch=options.update_freq, stats_rate=options.plot_rate,
                    enable_plots = options.plots, max_memory = options.maxmem )
agent.learn()
コード例 #31
0
                  default=False,
                  help="Submit Results to OpenAI [%default]")
parser.add_option("-a",
                  "--agent",
                  dest="agent",
                  default="ddqn",
                  help="Which learning algorithm to use [%default]")
(options, args) = parser.parse_args()

print options.agent

training_dir = tempfile.mkdtemp()
logging.getLogger().setLevel(logging.DEBUG)

from gym import envs
env = envs.make(options.env)
if options.submit:
    env.monitor.start(training_dir)

import dqn
agent_constructor = {"dqn": dqn.DQN, "ddqn": dqn.D2QN}[options.agent]

agent = agent_constructor(env,
                          nframes=options.nframes,
                          epsilon=options.epsilon,
                          discount=options.discount,
                          modelfactory=eval("dqn.%s" % (options.net)),
                          epsilon_schedule=lambda episode, epsilon: max(
                              0.05, epsilon * (1 - options.epsilon_decay)),
                          update_nsamp=options.update_size,
                          batch_size=options.bs,
コード例 #32
0
ファイル: main.py プロジェクト: zzmjohn/trpo
        ret = []
        for o, r, d in zip(observation_n, reward_n, done_n):
            o = env.observation_convert(o, env._env.observation_space,
                                        env.observation_space)
            obs = np.expand_dims(o, 0)
            action_dist_n = self.session.run(self.action_dist_n,
                                             {self.obs: obs})
            action = int(np.argmax(action_dist_n, 1)[0])
            action = env.action_convert(action, env.action_space,
                                        env._env.action_space)
            ret.append(action)
        return ret


experiment_dir = tempfile.mkdtemp()
logging.getLogger().setLevel(logging.DEBUG)
print("taks = {}".format(args.task))
env = envs.make(args.task)

env.monitor.start(experiment_dir)

agent = ContinTRPOAgent(env)
agent.learn()
env.monitor.close()
gym.upload(experiment_dir, algorithm_id=algo)

print(experiment_dir)

from sys import argv
print('python {}'.format(' '.join(argv)))
コード例 #33
0
ファイル: test_registration.py プロジェクト: tima04/gym
def test_make():
    env = envs.make("CartPole-v0")
    assert env.spec.id == "CartPole-v0"
    assert isinstance(env, cartpole.CartPoleEnv)
コード例 #34
0
    def __init__(self,
                 experiment="Breakout-v0",
                 env=None,
                 nthreads=16,
                 nframes=1,
                 epsilon=0.5,
                 enable_plots=False,
                 render=False,
                 learning_rate=1e-4,
                 modelfactory=networks.simple_cnn,
                 difference_obs=True,
                 preprocessor=preproc.karpathy_preproc,
                 discount=0.99,
                 batch_size=32,
                 epsilon_min=0.05,
                 epsilon_schedule=None,
                 stats_rate=10,
                 **kwargs):
        self.kwargs = kwargs
        self.experiment = experiment
        if env == None:
            env = lambda: envs.make(self.experiment)
        self.nthreads = nthreads
        self.env = map(lambda x: env(), range(0, self.nthreads))
        self.model_factory = modelfactory
        self.nframes = nframes
        self.learning_rate = learning_rate
        self.epsilon = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_schedule = epsilon_schedule
        self.gamma = discount
        self.preprocessor = preprocessor
        self.difference_obs = difference_obs
        self.network_update_frequency = batch_size
        self.target_network_update_frequency = 10000
        self.T = 0
        self.TMAX = 80000000
        self.checkpoint_interval = 600
        self.checkpoint_dir = "/tmp/"
        self.enable_plots = enable_plots
        self.stats_rate = stats_rate
        self.ipy_clear = False
        self.next_plot = 0
        self.e = 0
        self.render = render

        self.render_rate_hz = 5.0
        self.render_ngames = 2
        self.plot_q = Queue.Queue()

        # set up output shape to be either pre-processed or not
        if not self.preprocessor == None:
            print(self.env[0].observation_space.shape)
            o = self.preprocessor(np.zeros(
                self.env[0].observation_space.shape))
            self.input_dim_orig = [self.nframes] + list(o.shape)
        else:
            self.input_dim_orig = [self.nframes] + list(
                self.env[0].observation_space.shape)
        self.input_dim = np.product(self.input_dim_orig)
        print(self.input_dim, self.input_dim_orig)

        # set up plotting storage
        self.stats = None
        if self.enable_plots:
            self.stats = {
                "tr": statbin(self.stats_rate),  # Total Reward
                "ft": statbin(self.stats_rate),  # Finishing Time
                "minvf": statbin(self.stats_rate),  # Min Value Fn
                "maxvf": statbin(self.stats_rate),  # Min Value Fn
                "cost": statbin(self.stats_rate),  # Loss
            }

        # set up the TF session
        self.session = tf.Session()
        K.set_session(self.session)
        self.setup_graphs()
        self.saver = tf.train.Saver()
コード例 #35
0
 def __init__(self, env_name):
     self.env = envs.make(env_name)
     self.action_space = self.env.action_space
     self.curr_obs = self.env.reset()
     self.is_done = False
コード例 #36
0
import gym
from gym import spaces, envs
import argparse
import numpy as np
import itertools
import time
from builtins import input
import random

from mujoco_py.modder import TextureModder, MaterialModder
import cv2

from functions_mpi import *



comm = MPI.COMM_WORLD
size = comm.Get_size()
rank = comm.Get_rank()
number_to_experiment = [1,3,5,107,109,111]

if rank == 0:
    print("Hello I am the master rank", str(rank), "of", str(size))
    env = envs.make("FetchSlide-v1")
    MasterProgramCrossEntropy(env,size,comm)
else:
    print("Hello I am the slave rank", str(rank), "of", str(size))
    env = envs.make("FetchSlide-v1")
    #SlaveProgram(rank,env)
    SlaveProgramCrossEntropyExperimentReward(rank,env,comm,number_to_experiment )
コード例 #37
0
ファイル: run_pg.py プロジェクト: domluna/ml_p5_capstone
import argparse, sys, cPickle
from tabulate import tabulate
import shutil, os, logging
import gym
import numpy as np


if __name__ == "__main__":
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    update_argument_parser(parser, GENERAL_OPTIONS)
    parser.add_argument("--env", required=True)
    parser.add_argument("--agent", required=True)
    parser.add_argument("--plot", action="store_true")
    args, _ = parser.parse_known_args([arg for arg in sys.argv[1:] if arg not in ('-h', '--help')])

    env = make(args.env)
    env_spec = env.spec

    mondir = args.outfile + ".dir"
    if os.path.exists(mondir): shutil.rmtree(mondir)
    os.mkdir(mondir)
    env.monitor.start(mondir, video_callable=None if args.video else VIDEO_NEVER)
    agent_ctor = get_agent_cls(args.agent)
    update_argument_parser(parser, agent_ctor.options)
    args = parser.parse_args()
    if args.timestep_limit == 0:
        args.timestep_limit = env_spec.timestep_limit
    cfg = args.__dict__
    np.random.seed(args.seed)

    print env.observation_space, env.action_space
コード例 #38
0
def test_env_suggestions(register_some_envs, env_id_input, env_id_suggested):
    with pytest.raises(error.UnregisteredEnv,
                       match=f"Did you mean: `{env_id_suggested}` ?"):
        envs.make(env_id_input)
コード例 #39
0
ファイル: run.py プロジェクト: AllanYiin/CNTK
                        'random number generator. Negative value is ignored.')
    args = parser.parse_args()

    if (args.seed >= 0):
        np.random.seed(args.seed)

    # Use xrange for python 2.7 to speed up.
    if sys.version_info.major < 3:
        range = xrange

    # Create an OpenAI Gym environment, and obtain its state/action
    # information.
    if args.env not in envs.registry.env_specs.keys():
        # Try to find from local environment libraries.
        env_factory.register_env(args.env)
    env = envs.make(args.env)
    o_space = env.observation_space
    a_space = env.action_space
    image_observation = True if isinstance(
        env.env, AtariEnv) and env.env._obs_type == 'image' else False
    print("Loaded environment '{0}'".format(args.env))
    print("Observation space: '{0}'".format(o_space))
    print("Action space: '{0}'".format(a_space))
    print('Is observation an image: {0}'.format(image_observation))

    if args.max_episode_steps <= 0:
        args.max_episode_steps = \
            env.spec.tags['wrapper_config.TimeLimit.max_episode_steps']

    # Create an agent.
    agent = agent_factory.make_agent(args.agent_config,
コード例 #40
0
import gym
from gym import spaces, envs

gym.undo_logger_setup()
import logging; logging.getLogger('gym.core').addHandler(logging.NullHandler())

names = ['CartPole-v0', 'Acrobot-v0', 'MountainCar-v0', 'Reacher-v1', 'HalfCheetah-v1', 'Hopper-v1', 'Walker2d-v1', 'Ant-v1', 'Humanoid-v1']
for n in names:
    env = envs.make(n)

    aspace = env.action_space
    if isinstance(aspace, spaces.Box):
        acont = True
        asize = aspace.low.shape[0]
    else:
        acont = False
        asize = aspace.n

    ospace = env.observation_space
    if isinstance(ospace, spaces.Box):
        ocont = True
        osize = ospace.low.shape[0]
    else:
        ocont = False
        osize = ospace.n

    print '{} & {} ({}) & {} ({}) \\\\'.format(n, osize, 'continuous' if ocont else 'discrete', asize, 'continuous' if acont else 'discrete')
コード例 #41
0
                    print(k + ": " + " " * (40 - len(k)) + str(v))
                if entropy != entropy:
                    exit(-1)
                if exp > 0.8:
                    self.train = False
            i += 1


training_dir = tempfile.mkdtemp()
logging.getLogger().setLevel(logging.DEBUG)

# if len(sys.argv) > 1:
#     print(9284)
#     task = sys.argv[1]
# else:
#     print(8966)
#     task = "RepeatCopy-v0"

task = "CartPole-v0"
# task = "MountainCar-v0"

env = envs.make(task)
# env.monitor.start(training_dir)

# env = SpaceConversionEnv(env, Box, Discrete)

agent = TRPOAgent(env)
agent.learn()
env.monitor.close()
gym.upload(training_dir, algorithm_id='trpo_ff')
コード例 #42
0
def test_return_latest_versioned_env(register_some_envs):
    with pytest.warns(UserWarning):
        env = envs.make("MyAwesomeNamespace/MyAwesomeVersionedEnv")
    assert env.spec.id == "MyAwesomeNamespace/MyAwesomeVersionedEnv-v5"
コード例 #43
0
import gym
from gym import spaces, envs

gym.undo_logger_setup()
import logging
logging.getLogger('gym.core').addHandler(logging.NullHandler())

names = [
    'CartPole-v0', 'Acrobot-v0', 'MountainCar-v0', 'Reacher-v1',
    'HalfCheetah-v1', 'Hopper-v1', 'Walker2d-v1', 'Ant-v1', 'Humanoid-v1'
]
for n in names:
    env = envs.make(n)

    aspace = env.action_space
    if isinstance(aspace, spaces.Box):
        acont = True
        asize = aspace.low.shape[0]
    else:
        acont = False
        asize = aspace.n

    ospace = env.observation_space
    if isinstance(ospace, spaces.Box):
        ocont = True
        osize = ospace.low.shape[0]
    else:
        ocont = False
        osize = ospace.n

    print '{} & {} ({}) & {} ({}) \\\\'.format(
コード例 #44
0
ファイル: dqn.py プロジェクト: ChenglongChen/kerlym
    def __init__(
        self,
        experiment="Breakout-v0",
        env=None,
        nthreads=16,
        nframes=1,
        epsilon=0.5,
        enable_plots=False,
        render=False,
        learning_rate=1e-4,
        modelfactory=networks.simple_cnn,
        difference_obs=True,
        preprocessor=preproc.karpathy_preproc,
        discount=0.99,
        batch_size=32,
        epsilon_min=0.05,
        epsilon_schedule=None,
        stats_rate=10,
        **kwargs
    ):
        self.kwargs = kwargs
        self.experiment = experiment
        if env == None:
            env = lambda: envs.make(self.experiment)
        self.nthreads = nthreads
        self.env = map(lambda x: env(), range(0, self.nthreads))
        self.model_factory = modelfactory
        self.nframes = nframes
        self.learning_rate = learning_rate
        self.epsilon = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_schedule = epsilon_schedule
        self.gamma = discount
        self.preprocessor = preprocessor
        self.difference_obs = difference_obs
        self.network_update_frequency = batch_size
        self.target_network_update_frequency = 10000
        self.T = 0
        self.TMAX = 80000000
        self.checkpoint_interval = 600
        self.checkpoint_dir = "/tmp/"
        self.enable_plots = enable_plots
        self.stats_rate = stats_rate
        self.ipy_clear = False
        self.next_plot = 0
        self.e = 0
        self.render = render

        self.render_rate_hz = 5.0
        self.render_ngames = 2
        self.plot_q = Queue.Queue()

        # set up output shape to be either pre-processed or not
        if not self.preprocessor == None:
            print self.env[0].observation_space.shape
            o = self.preprocessor(np.zeros(self.env[0].observation_space.shape))
            self.input_dim_orig = [self.nframes] + list(o.shape)
        else:
            self.input_dim_orig = [self.nframes] + list(self.env[0].observation_space.shape)
        self.input_dim = np.product(self.input_dim_orig)
        print self.input_dim, self.input_dim_orig

        # set up plotting storage
        self.stats = None
        if self.enable_plots:
            self.stats = {
                "tr": statbin(self.stats_rate),  # Total Reward
                "ft": statbin(self.stats_rate),  # Finishing Time
                "minvf": statbin(self.stats_rate),  # Min Value Fn
                "maxvf": statbin(self.stats_rate),  # Min Value Fn
                "cost": statbin(self.stats_rate),  # Loss
            }

        # set up the TF session
        self.session = tf.Session()
        K.set_session(self.session)
        self.setup_graphs()
        self.saver = tf.train.Saver()
コード例 #45
0
ファイル: debug.py プロジェクト: analog-rl/modular_rl
from modular_rl import *
import argparse, sys, cPickle
from tabulate import tabulate
import shutil, os, logging
import gym


mondir = "tmp.dir"
env = 'CartPole-v0'
video = False
agent = 'modular_rl.agentzoo.TrpoAgent'
seed = 7
use_hdf = False


env = make(env)
os.mkdir(mondir)
env.monitor.start(mondir, video_callable=None if video else VIDEO_NEVER)
agent_ctor = get_agent_cls(agent)
# update_argument_parser(parser, agent_ctor.options)
# if args.timestep_limit == 0:
#     args.timestep_limit = env_spec.timestep_limit
# cfg = args.__dict__
np.random.seed(seed)
agent = agent_ctor(env.observation_space, env.action_space, [])
# if use_hdf:
#     hdf, diagnostics = prepare_h5_file(args)
gym.logger.setLevel(logging.WARN)
timestep_limit
COUNTER = 0
コード例 #46
0
ファイル: test_registration.py プロジェクト: AniChikage/gym
def test_make():
    env = envs.make('CartPole-v0')
    assert env.spec.id == 'CartPole-v0'
    assert isinstance(env, cartpole.CartPoleEnv)