Ejemplo n.º 1
0
def test_default_lookups():
    register("test/Test3")

    with pytest.raises(error.DeprecatedEnv):
        spec("test/Test3-v0")

    # Lookup default
    spec("test/Test3")
Ejemplo n.º 2
0
def register_custom_envs():
    for key, value in custom_envs.items():
        arg_dict = dict(id=key,
                        entry_point=value['path'],
                        max_episode_steps=value['max_episode_steps'],
                        kwargs=value['kwargs'])
        if 'reward_threshold' in value.keys():
            arg_dict['reward_threshold'] = value['reward_threshold']
        register(**arg_dict)
Ejemplo n.º 3
0
def register_custom_envs():
    for key, value in custom_envs.items():
        arg_dict = dict(id=key, 
                        entry_point=value['path'], 
                        max_episode_steps=value['max_episode_steps'], 
                        kwargs=value['kwargs'])
        if 'reward_threshold' in value.keys():
            arg_dict['reward_threshold'] = value['reward_threshold']
        register(**arg_dict)
Ejemplo n.º 4
0
def register_envs(dependency_labels_file):
    register(
        id='ArcStandardTransitionEnv-v0',
        entry_point='deeprl.dp_envs:ArcStandardTransitionEnv',
        kwargs={'dependency_labels_file': dependency_labels_file})

    register(
        id='ArcEagerTransitionEnv-v0',
        entry_point='deeprl_hw1.queue_envs:QueueEnv',
        kwargs={'dependency_labels_file': dependency_labels_file})
Ejemplo n.º 5
0
def test_register(env_id, namespace, name, version):
    register(env_id)
    assert gym.envs.spec(env_id).id == env_id
    full_name = f"{name}"
    if namespace:
        full_name = f"{namespace}/{full_name}"
    if version is not None:
        full_name = f"{full_name}-v{version}"
    assert full_name in gym.envs.registry.keys()
    del gym.envs.registry[env_id]
Ejemplo n.º 6
0
def test_versioned_lookups():
    register("test/Test2-v5")

    with pytest.raises(error.VersionNotFound):
        spec("test/Test2-v9")

    with pytest.raises(error.DeprecatedEnv):
        spec("test/Test2-v4")

    assert spec("test/Test2-v5")
Ejemplo n.º 7
0
def test_namespace():
    # Check if the namespace context manager works
    with registration.namespace("MyDefaultNamespace"):
        register("MyDefaultEnvironment-v0")
    register("MyDefaultEnvironment-v1")
    assert "MyDefaultNamespace/MyDefaultEnvironment-v0" in registry
    assert "MyDefaultEnvironment-v1" in registry

    del registry["MyDefaultNamespace/MyDefaultEnvironment-v0"]
    del registry["MyDefaultEnvironment-v1"]
Ejemplo n.º 8
0
def register_custom_envs():
    global _REGISTERED
    if _REGISTERED:
        return
    _REGISTERED = True

    LOGGER.info("Registering custom gym environments")
    register(id='PointMazeRight-v0',
             entry_point='inverse_rl.envs.point_maze_env:PointMazeEnv',
             kwargs={
                 'sparse_reward': False,
                 'direction': 1,
                 'discrete': True
             })
    register(id='PointMazeLeft-v0',
             entry_point='inverse_rl.envs.point_maze_env:PointMazeEnv',
             kwargs={
                 'sparse_reward': False,
                 'direction': 0,
                 'discrete': True
             })
    register(id='PointMazeRightCont-v0',
             entry_point='inverse_rl.envs.point_maze_env:PointMazeEnv',
             kwargs={
                 'sparse_reward': False,
                 'direction': 1,
                 'discrete': False
             })
    register(id='PointMazeLeftCont-v0',
             entry_point='inverse_rl.envs.point_maze_env:PointMazeEnv',
             kwargs={
                 'sparse_reward': False,
                 'direction': 0,
                 'discrete': False
             })
Ejemplo n.º 9
0
def make_env_Rubik(**kwargs):
    id = ("Rubik-" + str(kwargs) + "-v0").translate(
        str.maketrans('', '', " {}'<>()_"))
    id = id.replace(',', '-')

    try:
        register(id=id, entry_point='gym_rubik.envs:RubikEnv', kwargs=kwargs)
        print("Registered environment with id = " + id)
    except:
        print("Environment with id = " + id +
              " already registered. Continuing with that environment.")

    env = gym.make(id)

    return env
Ejemplo n.º 10
0
def register_from_string(env_id, class_=None, **kwargs):
    if class_ == "SubtasksGridWorld":
        class_ = SubtasksGridWorld
    elif "random" in kwargs:
        class_ = RandomGridWorld
    else:
        class_ = GridWorld

    register(
        id=env_id,
        entry_point=f"{class_.__module__}:{class_.__name__}",
        reward_threshold=kwargs.pop("reward_threshold", None),
        max_episode_steps=kwargs.pop("max_episode_steps", None),
        nondeterministic=False,
        kwargs=kwargs,
    )
Ejemplo n.º 11
0
def register_env(env_id):
    if env_id == 'Maze2D-v0':
        envs.register(id=env_id,
                      entry_point='env:maze2d.Maze2D',
                      kwargs={},
                      max_episode_steps=200,
                      reward_threshold=-110.0)
    elif env_id == '3linkarm-v0':
        envs.register(id=env_id,
                      entry_point='threelinkarm:ThreelinkArm',
                      kwargs={},
                      max_episode_steps=200,
                      reward_threshold=-100.0)
    else:
        raise ValueError('Cannot find environment "{0}"\n'.format(env_id))
    return True
Ejemplo n.º 12
0
def make_env_BitFlipper(n=10, space_seed=0):
    id = "BitFlipper" + str(n) + ":" + str(space_seed) + "-v0"
    try:
        register(id=id,
                 entry_point='gym_BitFlipper.envs:BitFlipperEnv',
                 kwargs={
                     "space_seed": space_seed,
                     "n": n
                 })
    except:
        print("Environment with id = " + id +
              " already registered. Continuing with that environment.")

    env = gym.make(id)
    env.seed(0)

    return env
Ejemplo n.º 13
0
def test_default_time_limit():
    # We need an env without a default limit
    register(
        id='test.NoLimitDummyVNCEnv-v0',
        entry_point='universe.envs:DummyVNCEnv',
        tags={
            'vnc': True,
        },
    )

    env = gym.make('test.NoLimitDummyVNCEnv-v0')
    env.configure(_n=1)
    env = wrappers.TimeLimit(env)
    env.reset()

    assert env._max_episode_seconds == wrappers.time_limit.DEFAULT_MAX_EPISODE_SECONDS
    assert env._max_episode_steps == None
Ejemplo n.º 14
0
def test_default_time_limit():
    # We need an env without a default limit
    register(
        id='test.NoLimitDummyVNCEnv-v0',
        entry_point='universe.envs:DummyVNCEnv',
        tags={
            'vnc': True,
            },
    )

    env = gym.make('test.NoLimitDummyVNCEnv-v0')
    env = wrappers.TimeLimit(env)
    env.configure(_n=1)
    env.reset()

    assert env._max_episode_seconds == wrappers.time_limit.DEFAULT_MAX_EPISODE_SECONDS
    assert env._max_episode_steps == None
Ejemplo n.º 15
0
def register_env(env_id):
    if env_id == 'Maze2D-v0':
        envs.register(
            id=env_id,
            entry_point='env:maze2d.Maze2D',
            kwargs={},
            max_episode_steps=200,
            reward_threshold=-110.0)
    elif env_id == 'PuddleWorld-v0':
        envs.register(
            id=env_id,
            entry_point='env:puddleworld.PuddleWorld',
            kwargs={},
            max_episode_steps=200,
            reward_threshold=-100.0)
    else:
        raise ValueError('Cannot find environment "{0}"\n'.format(env_id))
    return True
 def test_cache(self):
     np.random.seed(123)
     with open("..\query_pull_1000v3.pkl", 'rb') as f:
         query_pull = pickle.load(f)
         register(
             id='DatabaseIndexesEnv-v0',
             entry_point='dbenv:DatabaseIndexesEnv',
             kwargs={'n': const.COLUMNS_AMOUNT,
                     'table_name': "test_table",
                     'query_pull': query_pull,
                     'batch_size': 2,
                     'connector': PostgresConnector(),
                     'k': 3,
                     'max_episodes': 1}
         )
         env = gym.make('DatabaseIndexesEnv-v0')
         env.step(0)
         env.step(1)
         env.step(2)
         print(env.cache)
    def env_creator():
        # This is awkward, but gym adds some magic required for wrappers
        env_creator = get_env_creator(
            env_callable_name="gym_sokoban.envs:SokobanEnv", seed=seed)

        register("BareEnv-v0", entry_point=env_creator)
        env = gym.make("BareEnv-v0")

        curriculum_setter = curriculum_setter_fn(env)
        env = EpisodeHistoryCallbackWrapper(env, [
            EpisodeHistorySummarizer(
                summary_helper, curriculum_setter, freq=20)
        ])
        if video_directory:
            env = VideoRecorderWrapper(
                env,
                directory=video_directory,
                record_video_trigger=record_video_trigger,
                video_length=2000000,
                summary_helper=summary_helper)
        return env
def train_model():
    np.random.seed(123)
    with open("query_pull_1000v2.pkl", 'rb') as f:
        query_pull = pickle.load(f)[0:5]

        register(id=ENV_NAME,
                 entry_point='dbenv:DatabaseIndexesEnv',
                 kwargs={
                     'n': COLUMNS_AMOUNT,
                     'table_name': table_name,
                     'query_pull': query_pull,
                     'batch_size': BATCH_SIZE,
                     'connector': PostgresConnector(),
                     'k': 3,
                     'max_episodes': episodes
                 })

        # Get the environment and extract the number of actions.
        env = gym.make(ENV_NAME)
        env.seed(123)

        # Next, we build a very simple model.
        model = build_model()
        print(model.summary())

        # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
        # even the metrics!
        dqn = initialize_agent(model)

        # Okay, now it's time to learn something! We visualize the training here for show, but this
        # slows down training quite a lot. You can always safely abort the training prematurely using
        # Ctrl + C.
        dqn.fit(env, nb_steps=episodes, visualize=False, verbose=2)

        # After training is done, we save the final weights.
        dqn.save_weights('dqn_specific_{}.h5f'.format(ENV_NAME),
                         overwrite=True)

        # Finally, evaluate our algorithm for 5 episodes.
        dqn.test(env, nb_episodes=5, visualize=False)
Ejemplo n.º 19
0
    def _register(self, user_env):
        if user_env['id'].lower() in self.env_ids:
            logger.info(
                'Deregistering the environment %s, because it is already registered.',
                user_env['id'])
            deregister(user_env['id'])
            for i, env in enumerate(self.user_envs):
                if env['id'].lower() == user_env['id'].lower():
                    self.user_envs.pop(i)
                    break

        register_params = [
            'id', 'entry_point', 'timestep_limit', 'trials',
            'reward_threshold', 'local_only', 'kwargs', 'nondeterministic'
        ]
        register_kwargs = {
            k: user_env[k]
            for k in register_params if k in user_env
        }
        register(**register_kwargs)
        self.user_envs.append(user_env)
        self.env_ids.add(user_env['id'].lower())
        return True
Ejemplo n.º 20
0
def test_register_versioned_unversioned():
    # Register versioned then unversioned
    versioned_env = "Test/MyEnv-v0"
    register(versioned_env)
    assert gym.envs.spec(versioned_env).id == versioned_env
    unversioned_env = "Test/MyEnv"
    with pytest.raises(error.RegistrationError):
        register(unversioned_env)

    # Clean everything
    del gym.envs.registry[versioned_env]

    # Register unversioned then versioned
    register(unversioned_env)
    assert gym.envs.spec(unversioned_env).id == unversioned_env
    with pytest.raises(error.RegistrationError):
        register(versioned_env)

    # Clean everything
    del gym.envs.registry[unversioned_env]
Ejemplo n.º 21
0
def test_missing_lookup():
    register(id="Test1-v0", entry_point=None)
    register(id="Test1-v15", entry_point=None)
    register(id="Test1-v9", entry_point=None)
    register(id="Other1-v100", entry_point=None)

    with pytest.raises(error.DeprecatedEnv):
        spec("Test1-v1")

    try:
        spec("Test1-v1000")
    except error.UnregisteredEnv:
        pass
    else:
        assert False

    try:
        spec("Unknown1-v1")
    except error.UnregisteredEnv:
        pass
    else:
        assert False
Ejemplo n.º 22
0
def test_register_versioned_unversioned():
    # Register versioned then unversioned
    versioned_env = "Test/MyEnv-v0"
    envs.register(versioned_env)
    assert gym.envs.spec(versioned_env).id == versioned_env
    unversioned_env = "Test/MyEnv"
    with pytest.raises(error.RegistrationError):
        envs.register(unversioned_env)

    # Clean everything
    del gym.envs.registry.env_specs[versioned_env]

    # Register unversioned then versioned
    with pytest.warns(UserWarning):
        envs.register(unversioned_env)
    assert gym.envs.spec(unversioned_env).id == unversioned_env
    with pytest.raises(error.RegistrationError):
        envs.register(versioned_env)

    # Clean everything
    envs_list = [versioned_env, unversioned_env]
    for env in envs_list:
        del gym.envs.registry.env_specs[env]
Ejemplo n.º 23
0
    def feature_ext(self, state, action=None):
        if action is None:
            out = np.zeros((self.state_size))
            out[state] = 1
        else:
            out = np.zeros((self.state_size * self.action_size))
            out[action * self.state_size + state] = 1
        return out


register(
    id='FrozenLake-simple-v0',
    entry_point='gym.envs.toy_text:FrozenLakeEnv',
    kwargs={
        'map_name': '4x4',
        "is_slippery": False
    },
    max_episode_steps=100,
    reward_threshold=0.78,  # optimum = .8196
)

if __name__ == '__main__':

    envs = [gym.make("FrozenLake-v0") for _ in range(1)]
    env = envs[0]
    state_size = env.observation_space.n
    action_size = env.action_space.n
    sess = tf.Session()
    agent = FrozenLakeAgentLSPI(state_size, action_size, 0.9, envs)
    init = tf.global_variables_initializer()
    sess.run(init)
Ejemplo n.º 24
0
import torch.nn as nn

from src.mdp.mdp import MDP
from src.policy.policy import Policy, policy_gmm
from src.configs.configs import TORCH_DTYPE, NP_DTYPE
from src.nopg.nopg import NOPG

# Use the GPU if available, or if the memory is insufficient use only the CPU
# DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
DEVICE = torch.device('cpu')

##########################################################################################
# Create the Environment (MDP)
register(
    id='Pendulum-v1',
    entry_point='gym.envs.classic_control:PendulumEnv',
    max_episode_steps=500,
)
env = gym.make('Pendulum-v1')
mdp = MDP(env)

##########################################################################################
# Gather an Off-Policy Dataset

states = mdp.discretize_space(space='state', levels=[20,
                                                     20])  # theta, theta_dot
actions = mdp.discretize_space(space='action', levels=[2])
sampling_params = {
    'sampling_type': 'uniform',
    'states': states,
    'actions': actions,
Ejemplo n.º 25
0
from gym_doom.doom_env import DoomEnv
from gym_doom.doom_basic import DoomBasicEnv
from gym_doom.doom_corridor import DoomCorridorEnv
from gym_doom.doom_defend_center import DoomDefendCenterEnv
from gym_doom.doom_defend_line import DoomDefendLineEnv
from gym_doom.doom_health_gathering import DoomHealthGatheringEnv
from gym_doom.doom_my_way_home import DoomMyWayHomeEnv
from gym_doom.doom_predict_position import DoomPredictPositionEnv
from gym_doom.doom_take_cover import DoomTakeCoverEnv
from gym_doom.doom_deathmatch import DoomDeathmatchEnv

# Doom
# ----------------------------------------

register(
    id='DoomBasic-v0',
    entry_point='gym_doom:DoomBasicEnv',
)

register(
    id='DoomCorridor-v0',
    entry_point='gym_doom:DoomCorridorEnv',
)

register(
    id='DoomDefendCenter-v0',
    entry_point='gym_doom:DoomDefendCenterEnv',
)

register(
    id='DoomDefendLine-v0',
    entry_point='gym_doom:DoomDefendLineEnv',
Ejemplo n.º 26
0
import logging

from gym.envs import register

register(id='airl/ObjPusher-v0',
         entry_point='airl.envs.pusher_env:PusherEnv',
         kwargs={'sparse_reward': False})
register(id='airl/TwoDMaze-v0', entry_point='airl.envs.twod_maze:TwoDMaze')
register(id='airl/PointMazeRight-v0',
         entry_point='airl.envs.point_maze_env:PointMazeEnv',
         kwargs={
             'sparse_reward': False,
             'direction': 1
         })
register(id='airl/PointMazeLeft-v0',
         entry_point='airl.envs.point_maze_env:PointMazeEnv',
         kwargs={
             'sparse_reward': False,
             'direction': 0
         })

# A modified ant which flips over less and learns faster via TRPO
register(id='airl/CustomAnt-v0',
         entry_point='airl.envs.ant_env:CustomAntEnv',
         kwargs={
             'gear': 30,
             'disabled': False
         })
register(id='airl/DisabledAnt-v0',
         entry_point='airl.envs.ant_env:CustomAntEnv',
         kwargs={
Ejemplo n.º 27
0
def test_register_error(env_id):
    with pytest.raises(error.Error, match="Malformed environment ID"):
        register(env_id)
        del quick
        assert state_np.shape == self.observation_space.shape
        position_map = state_np[:, :, 0]
        col_sum = position_map.sum(axis=0)
        row_sum = position_map.sum(axis=1)
        assert col_sum.sum() == 1, f'{col_sum.sum()}'
        self.posX = np.where(col_sum == 1)[0][0]
        self.posY = np.where(row_sum == 1)[0][0]

    # def set_maxsteps(self, num_steps):
    #   self.max_steps = num_steps


for N in range(2, 100):
    l = partial(ChainEnvironment, N)
    register(id=rf"ChainEnv{N}-v1", entry_point=l, max_episode_steps=200)

# if __name__ == "__main__":
#
#     import tensorflow as tf
#     import baselines.common.tf_util as U
#     from learning_and_planning.mcts.create_agent import create_agent
#
#     from mrunner.helpers.client_helper import get_configuration
#     import gin.tf.external_configurables
#
#
#     params = get_configuration(print_diagnostics=True,
#                                with_neptune=False,
#                                inject_parameters_to_gin=True)
#
Ejemplo n.º 29
0
    print("Mean return: ", np.mean(qlearning.episode_return))
    print(
        "Last 100 Episodes window: ",
        np.mean(qlearning.episode_return[episodes_completed -
                                         100:episodes_completed]))
    print("Total episodes: ", np.size(qlearning.episode_return))
    print("Total time steps: ", np.abs(np.sum(qlearning.episode_return)))
    return RewardsInfo(
        np.mean(qlearning.episode_return[episodes_completed -
                                         100:episodes_completed]))


if __name__ == "__main__":
    register(
        id='MountainCar-v3',
        entry_point='gym.envs.classic_control:MountainCarEnv',
        max_episode_steps=10000,
        reward_threshold=-110.0,
    )

    rewards_list = []
    # alphas = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
    # alphas = [0.3, 0.4, 0.5, 0.6, 0.7]
    alphas = [0.4]
    epsilons = [0.1]
    parameters_list = [(alpha, epsilon) for alpha in alphas
                       for epsilon in epsilons]
    for reward_info in Parallel(n_jobs=2)(delayed(do_experiment)(parameters)
                                          for parameters in parameters_list):
        rewards_list.append(reward_info)
Ejemplo n.º 30
0
from gym.envs import register
from metaworld.envs.mujoco.env_dict import ALL_V1_ENVIRONMENTS

ALL_ENVS = []

if __name__ != '__main__':
    for task_name in ALL_V1_ENVIRONMENTS:
        ID = f'{task_name.capitalize()}'
        register(id=ID,
                 entry_point='env_wrappers.metaworld.mw_env:MWEnv',
                 kwargs=dict(task_name=task_name,
                             # width=84, height=84, frame_skip=4
                             ),
                 )
        ALL_ENVS.append(ID)

        ID = f'{task_name[:-3].capitalize()}-fixed{task_name[-3:]}'
        register(id=ID,
                 entry_point='env_wrappers.metaworld.mw_env:MWFixedEnv',
                 kwargs=dict(task_name=task_name,
                             # width=84, height=84, frame_skip=4
                             ),
                 )
        ALL_ENVS.append(ID)
else:
    from cmx import doc

    doc @ """
    # Metaworld Environment Wrappers
    
    > This document, including the embedded video, is generated 
Ejemplo n.º 31
0
import gym
import time
import universe
from gym.envs import register
from universe import wrappers

register(
    id='test.SecondsLimitDummyVNCEnv-v0',
    entry_point='universe.envs:DummyVNCEnv',
    max_episode_seconds=0.1,
    tags={
        'vnc': True,
        }
    )

register(
    id='test.StepsLimitDummyVNCEnv-v0',
    entry_point='universe.envs:DummyVNCEnv',
    max_episode_steps=2,
    tags={
        'vnc': True,
        }
    )


def test_steps_limit_restart():
    env = gym.make('test.StepsLimitDummyVNCEnv-v0')
    env.configure(_n=1)
    env = wrappers.TimeLimit(env)
    env.reset()
Ejemplo n.º 32
0
        self.change_colors = game.change_colors
        self.change_dynamics = game.change_dynamics

        # super(KrazyGridWorldEnv, self).__init__()
        utils.EzPickle.__init__(self)

    def _seed(self, seed=None):
        position_seed, task_seed = seed
        self.env.seed(position_seed, task_seed)

    def _render(self, mode='human', close=False):
        # todo: @bstadie need to take care of other render modes
        return self.env.render()


GRID_WORLDS = {
    "EasyWorld-v0": EASY_GRID_KWARGS,
    "MediumWorld-v0": MEDIUM_GRID_KWARGS,
    "HardWorld-v0": HARD_GRID_KWARGS
}

for env_id, kwargs in GRID_WORLDS.items():
    register(
        env_id,
        # entry_point=lambda: KrazyGridWorldEnv(**kwargs),
        entry_point=
        "custom_vendor.krazy_worlds.krazy_world_envs:KrazyGridWorldEnv",
        kwargs=kwargs,
        max_episode_steps=24,
        reward_threshold=50.0)
Ejemplo n.º 33
0
import gym
import time
import universe
from gym.envs import register
from universe import wrappers

register(
    id='test.SecondsLimitDummyVNCEnv-v0',
    entry_point='universe.envs:DummyVNCEnv',
    tags={
        'vnc': True,
        'wrapper_config.TimeLimit.max_episode_seconds': 0.1
        }
    )

register(
    id='test.StepsLimitDummyVNCEnv-v0',
    entry_point='universe.envs:DummyVNCEnv',
    tags={
        'vnc': True,
        'wrapper_config.TimeLimit.max_episode_steps': 2
        }
    )


def test_steps_limit_restart():
    env = gym.make('test.StepsLimitDummyVNCEnv-v0')
    env = wrappers.TimeLimit(env)
    env.configure(_n=1)
    env.reset()
Ejemplo n.º 34
0
def register_custom_envs():
    global _REGISTERED
    if _REGISTERED:
        return
    _REGISTERED = True

    LOGGER.info("Registering custom gym environments")
    register(id='ObjPusher-v0',
             entry_point='LRMBMRL.envs.pusher_env:PusherEnv',
             kwargs={'sparse_reward': False})
    register(id='TwoDMaze-v0', entry_point='LRMBMRL.envs.twod_maze:TwoDMaze')
    register(id='PointMazeRight-v0',
             entry_point='LRMBMRL.envs.point_maze_env:PointMazeEnv',
             kwargs={
                 'sparse_reward': False,
                 'direction': 1
             })
    register(id='PointMazeLeft-v0',
             entry_point='LRMBMRL.envs.point_maze_env:PointMazeEnv',
             kwargs={
                 'sparse_reward': False,
                 'direction': 0
             })

    # pointmass
    register(id='PointMass-v0', entry_point='LRMBMRL.envs.pointmass:PointMass')

    # A modified ant which flips over less and learns faster via TRPO
    register(id='CustomAnt-v0',
             entry_point='LRMBMRL.envs.ant_env:CustomAntEnv',
             kwargs={
                 'gear': 30,
                 'disabled': False
             })
    register(id='DisabledAnt-v0',
             entry_point='LRMBMRL.envs.ant_env:CustomAntEnv',
             kwargs={
                 'gear': 30,
                 'disabled': True
             })

    register(id='VisualPointMaze-v0',
             entry_point='LRMBMRL.envs.visual_pointmass:VisualPointMazeEnv',
             kwargs={
                 'sparse_reward': False,
                 'direction': 1
             })
Ejemplo n.º 35
0
      x += 37
      return x,y

def wrapped_pong_factory(warm_up_examples=0, action_space_reduction=False,
                         reward_skip_steps=0, big_ball=False):
  env = gym.make('PongDeterministic-v4')
  env = env.env  # remove timelime wrapper
  env = PongWrapper(env, warm_up_examples=warm_up_examples,
                    action_space_reduction=action_space_reduction,
                    reward_skip_steps=reward_skip_steps,
                    big_ball=big_ball)
  return env


register(id="T2TPongWarmUp20RewSkip1000Steps-v1",
         entry_point=lambda: wrapped_pong_factory(warm_up_examples=20, reward_skip_steps=15),
         max_episode_steps=200)


def decode_image_from_png(image_str):
  from PIL import Image
  import io
  im = Image.open(io.BytesIO(image_str))
  return np.array(im)

def encode_image_to_png(image):
  from PIL import Image
  import io
  buffer=io.BytesIO()
  im = Image.fromarray(image)
  im.save(buffer, format="png")
Ejemplo n.º 36
0
            self.data.qpos[1:],
            # self.model.data.qvel.flat,
            self.data.qvel,
        ])

    def set_goal_direction(self, goal_direction=None):
        self.controls.sample(goal_direction=goal_direction)

    def get_goal_direction(self):  # only for debugging
        return self.controls.goal_direction

    def reset_model(self):
        qpos = self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq)
        qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
        self.set_state(qpos, qvel)
        return self._get_obs()

    def viewer_setup(self):
        self.viewer.cam.distance = self.model.stat.extent * 0.5


register(
    id='HalfCheetahGoalDir-v0',
    # todo: use module.sub_module:ClassName syntax to work with rcall and cloudpickle.
    # entry_point=lambda: HalfCheetahGoalVelEnv(),
    entry_point="e_maml_tf.custom_vendor.half_cheetah_goal_direction:HalfCheetahGoalDirEnv",
    kwargs={},
    max_episode_steps=200,
    reward_threshold=4800.0,
)