def test_default_lookups(): register("test/Test3") with pytest.raises(error.DeprecatedEnv): spec("test/Test3-v0") # Lookup default spec("test/Test3")
def register_custom_envs(): for key, value in custom_envs.items(): arg_dict = dict(id=key, entry_point=value['path'], max_episode_steps=value['max_episode_steps'], kwargs=value['kwargs']) if 'reward_threshold' in value.keys(): arg_dict['reward_threshold'] = value['reward_threshold'] register(**arg_dict)
def register_custom_envs(): for key, value in custom_envs.items(): arg_dict = dict(id=key, entry_point=value['path'], max_episode_steps=value['max_episode_steps'], kwargs=value['kwargs']) if 'reward_threshold' in value.keys(): arg_dict['reward_threshold'] = value['reward_threshold'] register(**arg_dict)
def register_envs(dependency_labels_file): register( id='ArcStandardTransitionEnv-v0', entry_point='deeprl.dp_envs:ArcStandardTransitionEnv', kwargs={'dependency_labels_file': dependency_labels_file}) register( id='ArcEagerTransitionEnv-v0', entry_point='deeprl_hw1.queue_envs:QueueEnv', kwargs={'dependency_labels_file': dependency_labels_file})
def test_register(env_id, namespace, name, version): register(env_id) assert gym.envs.spec(env_id).id == env_id full_name = f"{name}" if namespace: full_name = f"{namespace}/{full_name}" if version is not None: full_name = f"{full_name}-v{version}" assert full_name in gym.envs.registry.keys() del gym.envs.registry[env_id]
def test_versioned_lookups(): register("test/Test2-v5") with pytest.raises(error.VersionNotFound): spec("test/Test2-v9") with pytest.raises(error.DeprecatedEnv): spec("test/Test2-v4") assert spec("test/Test2-v5")
def test_namespace(): # Check if the namespace context manager works with registration.namespace("MyDefaultNamespace"): register("MyDefaultEnvironment-v0") register("MyDefaultEnvironment-v1") assert "MyDefaultNamespace/MyDefaultEnvironment-v0" in registry assert "MyDefaultEnvironment-v1" in registry del registry["MyDefaultNamespace/MyDefaultEnvironment-v0"] del registry["MyDefaultEnvironment-v1"]
def register_custom_envs(): global _REGISTERED if _REGISTERED: return _REGISTERED = True LOGGER.info("Registering custom gym environments") register(id='PointMazeRight-v0', entry_point='inverse_rl.envs.point_maze_env:PointMazeEnv', kwargs={ 'sparse_reward': False, 'direction': 1, 'discrete': True }) register(id='PointMazeLeft-v0', entry_point='inverse_rl.envs.point_maze_env:PointMazeEnv', kwargs={ 'sparse_reward': False, 'direction': 0, 'discrete': True }) register(id='PointMazeRightCont-v0', entry_point='inverse_rl.envs.point_maze_env:PointMazeEnv', kwargs={ 'sparse_reward': False, 'direction': 1, 'discrete': False }) register(id='PointMazeLeftCont-v0', entry_point='inverse_rl.envs.point_maze_env:PointMazeEnv', kwargs={ 'sparse_reward': False, 'direction': 0, 'discrete': False })
def make_env_Rubik(**kwargs): id = ("Rubik-" + str(kwargs) + "-v0").translate( str.maketrans('', '', " {}'<>()_")) id = id.replace(',', '-') try: register(id=id, entry_point='gym_rubik.envs:RubikEnv', kwargs=kwargs) print("Registered environment with id = " + id) except: print("Environment with id = " + id + " already registered. Continuing with that environment.") env = gym.make(id) return env
def register_from_string(env_id, class_=None, **kwargs): if class_ == "SubtasksGridWorld": class_ = SubtasksGridWorld elif "random" in kwargs: class_ = RandomGridWorld else: class_ = GridWorld register( id=env_id, entry_point=f"{class_.__module__}:{class_.__name__}", reward_threshold=kwargs.pop("reward_threshold", None), max_episode_steps=kwargs.pop("max_episode_steps", None), nondeterministic=False, kwargs=kwargs, )
def register_env(env_id): if env_id == 'Maze2D-v0': envs.register(id=env_id, entry_point='env:maze2d.Maze2D', kwargs={}, max_episode_steps=200, reward_threshold=-110.0) elif env_id == '3linkarm-v0': envs.register(id=env_id, entry_point='threelinkarm:ThreelinkArm', kwargs={}, max_episode_steps=200, reward_threshold=-100.0) else: raise ValueError('Cannot find environment "{0}"\n'.format(env_id)) return True
def make_env_BitFlipper(n=10, space_seed=0): id = "BitFlipper" + str(n) + ":" + str(space_seed) + "-v0" try: register(id=id, entry_point='gym_BitFlipper.envs:BitFlipperEnv', kwargs={ "space_seed": space_seed, "n": n }) except: print("Environment with id = " + id + " already registered. Continuing with that environment.") env = gym.make(id) env.seed(0) return env
def test_default_time_limit(): # We need an env without a default limit register( id='test.NoLimitDummyVNCEnv-v0', entry_point='universe.envs:DummyVNCEnv', tags={ 'vnc': True, }, ) env = gym.make('test.NoLimitDummyVNCEnv-v0') env.configure(_n=1) env = wrappers.TimeLimit(env) env.reset() assert env._max_episode_seconds == wrappers.time_limit.DEFAULT_MAX_EPISODE_SECONDS assert env._max_episode_steps == None
def test_default_time_limit(): # We need an env without a default limit register( id='test.NoLimitDummyVNCEnv-v0', entry_point='universe.envs:DummyVNCEnv', tags={ 'vnc': True, }, ) env = gym.make('test.NoLimitDummyVNCEnv-v0') env = wrappers.TimeLimit(env) env.configure(_n=1) env.reset() assert env._max_episode_seconds == wrappers.time_limit.DEFAULT_MAX_EPISODE_SECONDS assert env._max_episode_steps == None
def register_env(env_id): if env_id == 'Maze2D-v0': envs.register( id=env_id, entry_point='env:maze2d.Maze2D', kwargs={}, max_episode_steps=200, reward_threshold=-110.0) elif env_id == 'PuddleWorld-v0': envs.register( id=env_id, entry_point='env:puddleworld.PuddleWorld', kwargs={}, max_episode_steps=200, reward_threshold=-100.0) else: raise ValueError('Cannot find environment "{0}"\n'.format(env_id)) return True
def test_cache(self): np.random.seed(123) with open("..\query_pull_1000v3.pkl", 'rb') as f: query_pull = pickle.load(f) register( id='DatabaseIndexesEnv-v0', entry_point='dbenv:DatabaseIndexesEnv', kwargs={'n': const.COLUMNS_AMOUNT, 'table_name': "test_table", 'query_pull': query_pull, 'batch_size': 2, 'connector': PostgresConnector(), 'k': 3, 'max_episodes': 1} ) env = gym.make('DatabaseIndexesEnv-v0') env.step(0) env.step(1) env.step(2) print(env.cache)
def env_creator(): # This is awkward, but gym adds some magic required for wrappers env_creator = get_env_creator( env_callable_name="gym_sokoban.envs:SokobanEnv", seed=seed) register("BareEnv-v0", entry_point=env_creator) env = gym.make("BareEnv-v0") curriculum_setter = curriculum_setter_fn(env) env = EpisodeHistoryCallbackWrapper(env, [ EpisodeHistorySummarizer( summary_helper, curriculum_setter, freq=20) ]) if video_directory: env = VideoRecorderWrapper( env, directory=video_directory, record_video_trigger=record_video_trigger, video_length=2000000, summary_helper=summary_helper) return env
def train_model(): np.random.seed(123) with open("query_pull_1000v2.pkl", 'rb') as f: query_pull = pickle.load(f)[0:5] register(id=ENV_NAME, entry_point='dbenv:DatabaseIndexesEnv', kwargs={ 'n': COLUMNS_AMOUNT, 'table_name': table_name, 'query_pull': query_pull, 'batch_size': BATCH_SIZE, 'connector': PostgresConnector(), 'k': 3, 'max_episodes': episodes }) # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) env.seed(123) # Next, we build a very simple model. model = build_model() print(model.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! dqn = initialize_agent(model) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. dqn.fit(env, nb_steps=episodes, visualize=False, verbose=2) # After training is done, we save the final weights. dqn.save_weights('dqn_specific_{}.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=5, visualize=False)
def _register(self, user_env): if user_env['id'].lower() in self.env_ids: logger.info( 'Deregistering the environment %s, because it is already registered.', user_env['id']) deregister(user_env['id']) for i, env in enumerate(self.user_envs): if env['id'].lower() == user_env['id'].lower(): self.user_envs.pop(i) break register_params = [ 'id', 'entry_point', 'timestep_limit', 'trials', 'reward_threshold', 'local_only', 'kwargs', 'nondeterministic' ] register_kwargs = { k: user_env[k] for k in register_params if k in user_env } register(**register_kwargs) self.user_envs.append(user_env) self.env_ids.add(user_env['id'].lower()) return True
def test_register_versioned_unversioned(): # Register versioned then unversioned versioned_env = "Test/MyEnv-v0" register(versioned_env) assert gym.envs.spec(versioned_env).id == versioned_env unversioned_env = "Test/MyEnv" with pytest.raises(error.RegistrationError): register(unversioned_env) # Clean everything del gym.envs.registry[versioned_env] # Register unversioned then versioned register(unversioned_env) assert gym.envs.spec(unversioned_env).id == unversioned_env with pytest.raises(error.RegistrationError): register(versioned_env) # Clean everything del gym.envs.registry[unversioned_env]
def test_missing_lookup(): register(id="Test1-v0", entry_point=None) register(id="Test1-v15", entry_point=None) register(id="Test1-v9", entry_point=None) register(id="Other1-v100", entry_point=None) with pytest.raises(error.DeprecatedEnv): spec("Test1-v1") try: spec("Test1-v1000") except error.UnregisteredEnv: pass else: assert False try: spec("Unknown1-v1") except error.UnregisteredEnv: pass else: assert False
def test_register_versioned_unversioned(): # Register versioned then unversioned versioned_env = "Test/MyEnv-v0" envs.register(versioned_env) assert gym.envs.spec(versioned_env).id == versioned_env unversioned_env = "Test/MyEnv" with pytest.raises(error.RegistrationError): envs.register(unversioned_env) # Clean everything del gym.envs.registry.env_specs[versioned_env] # Register unversioned then versioned with pytest.warns(UserWarning): envs.register(unversioned_env) assert gym.envs.spec(unversioned_env).id == unversioned_env with pytest.raises(error.RegistrationError): envs.register(versioned_env) # Clean everything envs_list = [versioned_env, unversioned_env] for env in envs_list: del gym.envs.registry.env_specs[env]
def feature_ext(self, state, action=None): if action is None: out = np.zeros((self.state_size)) out[state] = 1 else: out = np.zeros((self.state_size * self.action_size)) out[action * self.state_size + state] = 1 return out register( id='FrozenLake-simple-v0', entry_point='gym.envs.toy_text:FrozenLakeEnv', kwargs={ 'map_name': '4x4', "is_slippery": False }, max_episode_steps=100, reward_threshold=0.78, # optimum = .8196 ) if __name__ == '__main__': envs = [gym.make("FrozenLake-v0") for _ in range(1)] env = envs[0] state_size = env.observation_space.n action_size = env.action_space.n sess = tf.Session() agent = FrozenLakeAgentLSPI(state_size, action_size, 0.9, envs) init = tf.global_variables_initializer() sess.run(init)
import torch.nn as nn from src.mdp.mdp import MDP from src.policy.policy import Policy, policy_gmm from src.configs.configs import TORCH_DTYPE, NP_DTYPE from src.nopg.nopg import NOPG # Use the GPU if available, or if the memory is insufficient use only the CPU # DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu') DEVICE = torch.device('cpu') ########################################################################################## # Create the Environment (MDP) register( id='Pendulum-v1', entry_point='gym.envs.classic_control:PendulumEnv', max_episode_steps=500, ) env = gym.make('Pendulum-v1') mdp = MDP(env) ########################################################################################## # Gather an Off-Policy Dataset states = mdp.discretize_space(space='state', levels=[20, 20]) # theta, theta_dot actions = mdp.discretize_space(space='action', levels=[2]) sampling_params = { 'sampling_type': 'uniform', 'states': states, 'actions': actions,
from gym_doom.doom_env import DoomEnv from gym_doom.doom_basic import DoomBasicEnv from gym_doom.doom_corridor import DoomCorridorEnv from gym_doom.doom_defend_center import DoomDefendCenterEnv from gym_doom.doom_defend_line import DoomDefendLineEnv from gym_doom.doom_health_gathering import DoomHealthGatheringEnv from gym_doom.doom_my_way_home import DoomMyWayHomeEnv from gym_doom.doom_predict_position import DoomPredictPositionEnv from gym_doom.doom_take_cover import DoomTakeCoverEnv from gym_doom.doom_deathmatch import DoomDeathmatchEnv # Doom # ---------------------------------------- register( id='DoomBasic-v0', entry_point='gym_doom:DoomBasicEnv', ) register( id='DoomCorridor-v0', entry_point='gym_doom:DoomCorridorEnv', ) register( id='DoomDefendCenter-v0', entry_point='gym_doom:DoomDefendCenterEnv', ) register( id='DoomDefendLine-v0', entry_point='gym_doom:DoomDefendLineEnv',
import logging from gym.envs import register register(id='airl/ObjPusher-v0', entry_point='airl.envs.pusher_env:PusherEnv', kwargs={'sparse_reward': False}) register(id='airl/TwoDMaze-v0', entry_point='airl.envs.twod_maze:TwoDMaze') register(id='airl/PointMazeRight-v0', entry_point='airl.envs.point_maze_env:PointMazeEnv', kwargs={ 'sparse_reward': False, 'direction': 1 }) register(id='airl/PointMazeLeft-v0', entry_point='airl.envs.point_maze_env:PointMazeEnv', kwargs={ 'sparse_reward': False, 'direction': 0 }) # A modified ant which flips over less and learns faster via TRPO register(id='airl/CustomAnt-v0', entry_point='airl.envs.ant_env:CustomAntEnv', kwargs={ 'gear': 30, 'disabled': False }) register(id='airl/DisabledAnt-v0', entry_point='airl.envs.ant_env:CustomAntEnv', kwargs={
def test_register_error(env_id): with pytest.raises(error.Error, match="Malformed environment ID"): register(env_id)
del quick assert state_np.shape == self.observation_space.shape position_map = state_np[:, :, 0] col_sum = position_map.sum(axis=0) row_sum = position_map.sum(axis=1) assert col_sum.sum() == 1, f'{col_sum.sum()}' self.posX = np.where(col_sum == 1)[0][0] self.posY = np.where(row_sum == 1)[0][0] # def set_maxsteps(self, num_steps): # self.max_steps = num_steps for N in range(2, 100): l = partial(ChainEnvironment, N) register(id=rf"ChainEnv{N}-v1", entry_point=l, max_episode_steps=200) # if __name__ == "__main__": # # import tensorflow as tf # import baselines.common.tf_util as U # from learning_and_planning.mcts.create_agent import create_agent # # from mrunner.helpers.client_helper import get_configuration # import gin.tf.external_configurables # # # params = get_configuration(print_diagnostics=True, # with_neptune=False, # inject_parameters_to_gin=True) #
print("Mean return: ", np.mean(qlearning.episode_return)) print( "Last 100 Episodes window: ", np.mean(qlearning.episode_return[episodes_completed - 100:episodes_completed])) print("Total episodes: ", np.size(qlearning.episode_return)) print("Total time steps: ", np.abs(np.sum(qlearning.episode_return))) return RewardsInfo( np.mean(qlearning.episode_return[episodes_completed - 100:episodes_completed])) if __name__ == "__main__": register( id='MountainCar-v3', entry_point='gym.envs.classic_control:MountainCarEnv', max_episode_steps=10000, reward_threshold=-110.0, ) rewards_list = [] # alphas = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] # alphas = [0.3, 0.4, 0.5, 0.6, 0.7] alphas = [0.4] epsilons = [0.1] parameters_list = [(alpha, epsilon) for alpha in alphas for epsilon in epsilons] for reward_info in Parallel(n_jobs=2)(delayed(do_experiment)(parameters) for parameters in parameters_list): rewards_list.append(reward_info)
from gym.envs import register from metaworld.envs.mujoco.env_dict import ALL_V1_ENVIRONMENTS ALL_ENVS = [] if __name__ != '__main__': for task_name in ALL_V1_ENVIRONMENTS: ID = f'{task_name.capitalize()}' register(id=ID, entry_point='env_wrappers.metaworld.mw_env:MWEnv', kwargs=dict(task_name=task_name, # width=84, height=84, frame_skip=4 ), ) ALL_ENVS.append(ID) ID = f'{task_name[:-3].capitalize()}-fixed{task_name[-3:]}' register(id=ID, entry_point='env_wrappers.metaworld.mw_env:MWFixedEnv', kwargs=dict(task_name=task_name, # width=84, height=84, frame_skip=4 ), ) ALL_ENVS.append(ID) else: from cmx import doc doc @ """ # Metaworld Environment Wrappers > This document, including the embedded video, is generated
import gym import time import universe from gym.envs import register from universe import wrappers register( id='test.SecondsLimitDummyVNCEnv-v0', entry_point='universe.envs:DummyVNCEnv', max_episode_seconds=0.1, tags={ 'vnc': True, } ) register( id='test.StepsLimitDummyVNCEnv-v0', entry_point='universe.envs:DummyVNCEnv', max_episode_steps=2, tags={ 'vnc': True, } ) def test_steps_limit_restart(): env = gym.make('test.StepsLimitDummyVNCEnv-v0') env.configure(_n=1) env = wrappers.TimeLimit(env) env.reset()
self.change_colors = game.change_colors self.change_dynamics = game.change_dynamics # super(KrazyGridWorldEnv, self).__init__() utils.EzPickle.__init__(self) def _seed(self, seed=None): position_seed, task_seed = seed self.env.seed(position_seed, task_seed) def _render(self, mode='human', close=False): # todo: @bstadie need to take care of other render modes return self.env.render() GRID_WORLDS = { "EasyWorld-v0": EASY_GRID_KWARGS, "MediumWorld-v0": MEDIUM_GRID_KWARGS, "HardWorld-v0": HARD_GRID_KWARGS } for env_id, kwargs in GRID_WORLDS.items(): register( env_id, # entry_point=lambda: KrazyGridWorldEnv(**kwargs), entry_point= "custom_vendor.krazy_worlds.krazy_world_envs:KrazyGridWorldEnv", kwargs=kwargs, max_episode_steps=24, reward_threshold=50.0)
import gym import time import universe from gym.envs import register from universe import wrappers register( id='test.SecondsLimitDummyVNCEnv-v0', entry_point='universe.envs:DummyVNCEnv', tags={ 'vnc': True, 'wrapper_config.TimeLimit.max_episode_seconds': 0.1 } ) register( id='test.StepsLimitDummyVNCEnv-v0', entry_point='universe.envs:DummyVNCEnv', tags={ 'vnc': True, 'wrapper_config.TimeLimit.max_episode_steps': 2 } ) def test_steps_limit_restart(): env = gym.make('test.StepsLimitDummyVNCEnv-v0') env = wrappers.TimeLimit(env) env.configure(_n=1) env.reset()
def register_custom_envs(): global _REGISTERED if _REGISTERED: return _REGISTERED = True LOGGER.info("Registering custom gym environments") register(id='ObjPusher-v0', entry_point='LRMBMRL.envs.pusher_env:PusherEnv', kwargs={'sparse_reward': False}) register(id='TwoDMaze-v0', entry_point='LRMBMRL.envs.twod_maze:TwoDMaze') register(id='PointMazeRight-v0', entry_point='LRMBMRL.envs.point_maze_env:PointMazeEnv', kwargs={ 'sparse_reward': False, 'direction': 1 }) register(id='PointMazeLeft-v0', entry_point='LRMBMRL.envs.point_maze_env:PointMazeEnv', kwargs={ 'sparse_reward': False, 'direction': 0 }) # pointmass register(id='PointMass-v0', entry_point='LRMBMRL.envs.pointmass:PointMass') # A modified ant which flips over less and learns faster via TRPO register(id='CustomAnt-v0', entry_point='LRMBMRL.envs.ant_env:CustomAntEnv', kwargs={ 'gear': 30, 'disabled': False }) register(id='DisabledAnt-v0', entry_point='LRMBMRL.envs.ant_env:CustomAntEnv', kwargs={ 'gear': 30, 'disabled': True }) register(id='VisualPointMaze-v0', entry_point='LRMBMRL.envs.visual_pointmass:VisualPointMazeEnv', kwargs={ 'sparse_reward': False, 'direction': 1 })
x += 37 return x,y def wrapped_pong_factory(warm_up_examples=0, action_space_reduction=False, reward_skip_steps=0, big_ball=False): env = gym.make('PongDeterministic-v4') env = env.env # remove timelime wrapper env = PongWrapper(env, warm_up_examples=warm_up_examples, action_space_reduction=action_space_reduction, reward_skip_steps=reward_skip_steps, big_ball=big_ball) return env register(id="T2TPongWarmUp20RewSkip1000Steps-v1", entry_point=lambda: wrapped_pong_factory(warm_up_examples=20, reward_skip_steps=15), max_episode_steps=200) def decode_image_from_png(image_str): from PIL import Image import io im = Image.open(io.BytesIO(image_str)) return np.array(im) def encode_image_to_png(image): from PIL import Image import io buffer=io.BytesIO() im = Image.fromarray(image) im.save(buffer, format="png")
self.data.qpos[1:], # self.model.data.qvel.flat, self.data.qvel, ]) def set_goal_direction(self, goal_direction=None): self.controls.sample(goal_direction=goal_direction) def get_goal_direction(self): # only for debugging return self.controls.goal_direction def reset_model(self): qpos = self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq) qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1 self.set_state(qpos, qvel) return self._get_obs() def viewer_setup(self): self.viewer.cam.distance = self.model.stat.extent * 0.5 register( id='HalfCheetahGoalDir-v0', # todo: use module.sub_module:ClassName syntax to work with rcall and cloudpickle. # entry_point=lambda: HalfCheetahGoalVelEnv(), entry_point="e_maml_tf.custom_vendor.half_cheetah_goal_direction:HalfCheetahGoalDirEnv", kwargs={}, max_episode_steps=200, reward_threshold=4800.0, )