def make_env(with_monitor=False,folder_name='results'): env = gym.make("FetchReach-v1") env.env.reward_type = 'dense' env = FlattenObservation(FilterObservation(env, ['observation', 'desired_goal'])) if with_monitor: env = gym.wrappers.Monitor(env, folder_name, force=True) return env
def __init__(self): self.mean = 0 self.std = 1 self.dims = 52 self.lb = -1 * np.ones(self.dims) self.ub = 1 * np.ones(self.dims) self.counter = 0 self.env = FlattenObservation( FilterObservation(gym.make('FetchReach-v1'), ['observation', 'desired_goal'])) self.num_rollouts = 3 self.render = False self.policy_shape = (4, 13) #tunable hyper-parameters in LA-MCTS self.Cp = 10 self.leaf_size = 100 self.kernel_type = "linear" self.gamma_type = "auto" self.ninits = 30 print("===========initialization===========") print("mean:", self.mean) print("std:", self.std) print("dims:", self.dims) print("policy:", self.policy_shape)
def make_robotics_env(env_id, seed, rank=0, allow_early_resets=True): """ Create a wrapped, monitored gym.Env for MuJoCo. :param env_id: (str) the environment ID :param seed: (int) the inital seed for RNG :param rank: (int) the rank of the environment (for logging) :param allow_early_resets: (bool) allows early reset of the environment :return: (Gym Environment) The robotic environment """ set_global_seeds(seed) env = gym.make(env_id) keys = ['observation', 'desired_goal'] # TODO: remove try-except once most users are running modern Gym try: # for modern Gym (>=0.15.4) from gym.wrappers import FilterObservation, FlattenObservation env = FlattenObservation(FilterObservation(env, keys)) except ImportError: # for older gym (<=0.15.3) from gym.wrappers import FlattenDictWrapper # pytype:disable=import-error env = FlattenDictWrapper(env, keys) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)), info_keywords=('is_success', ), allow_early_resets=allow_early_resets) env.seed(seed) return env
def _wrap_test(self, env: gym.Env): env = FilterObservation(env, filter_keys=['lidar']) env = Flatten(env, flatten_obs=False, flatten_actions=True) env = NormalizeObservations(env) env = FixedResetMode(env, mode='grid') env = TimeLimit(env, max_episode_steps=self._env_config.eval_time_limit) env = ActionRepeat(env, n=self._env_config.action_repeat) return env
def _wrap_training(self, env: gym.Env): env = FilterObservation(env, filter_keys=['lidar']) env = Flatten(env, flatten_obs=True, flatten_actions=True) env = NormalizeObservations(env) env = FixedResetMode(env, mode='random') env = TimeLimit(env, max_episode_steps=self._env_config.training_time_limit) env = ActionRepeat(env, n=self._env_config.action_repeat) env = GymWrapper(environment=env) env = wrappers.SinglePrecisionWrapper(env) return env
def test_nested_dicts_size(self, observation_space, flat_shape): env = FakeEnvironment(observation_space=observation_space) # Make sure we are testing the right environment for the test. observation_space = env.observation_space assert isinstance(observation_space, Dict) wrapped_env = FlattenObservation(FilterObservation(env, env.obs_keys)) assert wrapped_env.observation_space.shape == flat_shape assert wrapped_env.observation_space.dtype == np.float32
def make_robotics_env(env_id, seed, rank=0): """ Create a wrapped, monitored gym.Env for MuJoCo. """ set_global_seeds(seed) env = gym.make(env_id) env = FlattenObservation(FilterObservation(env, ['observation', 'desired_goal'])) env = Monitor( env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)), info_keywords=('is_success',)) env.seed(seed) return env
def _wrap_test(self, env: gym.Env): env = FilterObservation(env, filter_keys=['lidar']) env = Flatten(env, flatten_obs=False, flatten_actions=True) env = NormalizeObservations(env) env = InfoToObservation(env) env = FixedResetMode(env, mode='grid') env = TimeLimit(env, max_episode_steps=self._env_config.eval_time_limit) gym_env = ActionRepeat(env, n=self._env_config.action_repeat) env = GymWrapper(environment=gym_env) env = wrappers.SinglePrecisionWrapper(env) env.gym_env = gym_env return env
def create_goal_gym_env(**kwargs): frames = kwargs.pop('frames', 1) name = kwargs.pop('name') limit_steps = kwargs.pop('limit_steps', False) env = gym.make(name, **kwargs) env = FlattenObservation( FilterObservation(env, ['observation', 'desired_goal'])) if frames > 1: env = wrappers.FrameStack(env, frames, False) if limit_steps: env = wrappers.LimitStepsWrapper(env) return env
def main(): as_gdads = True name = "pointmass" drop_abs_position = True dads_env_fn = envs_fns[name] conf: Conf = CONFS[name] dict_env = as_dict_env(dads_env_fn()) dict_env = TimeLimit(dict_env, max_episode_steps=conf.ep_len) if drop_abs_position: dict_env = DropGoalEnvsAbsoluteLocation(dict_env) if as_gdads: flat_env = SkillWrapper(env=dict_env, skill_reset_steps=conf.ep_len // 2) else: flat_obs_content = ["observation", "desired_goal", "achieved_goal"] if drop_abs_position: flat_obs_content.remove("achieved_goal") # Because always 0 vector flat_env = FlattenObservation(FilterObservation(dict_env, filter_keys=flat_obs_content)) flat_env = TransformReward(flat_env, f=lambda r: r*conf.reward_scaling) flat_env = Monitor(flat_env) filename = f"modelsCommandSkills/{name}-gdads{as_gdads}" if os.path.exists(filename + ".zip"): sac = SAC.load(filename, env=flat_env) if as_gdads: flat_env.load(filename) else: sac = SAC("MlpPolicy", env=flat_env, verbose=1, learning_rate=conf.lr, tensorboard_log=f"{filename}-tb", buffer_size=10000) train(model=sac, conf=conf, save_fname=filename) if as_gdads: flat_env.save(filename) if as_gdads: flat_env.set_sac(sac) eval_dict_env(dict_env=dict_env, model=flat_env, ep_len=conf.ep_len) show(model=sac, env=flat_env, conf=conf)
def make_robotics_env(env_id, seed, rank=0): """ Create a wrapped, monitored gym.Env for MuJoCo. """ set_global_seeds(seed) env = gym.make(env_id) #env = FlattenDictWrapper(env, ['observation', 'desired_goal']) keys = ['observation', 'desired_goal'] # TODO: remove try-except once most users are running modern Gym try: # for modern Gym (>=0.15.4) from gym.wrappers import FilterObservation, FlattenObservation env = FlattenObservation(FilterObservation(env, keys)) except ImportError: # for older gym (<=0.15.3) from gym.wrappers import FlattenDictWrapper # pytype:disable=import-error env = FlattenDictWrapper(env, keys) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)), info_keywords=('is_success', )) env.seed(seed) return env
def make_env(env_id, env_type, mpi_rank=0, subrank=0, seed=None, reward_scale=1.0, gamestate=None, flatten_dict_observations=True, wrapper_kwargs=None, env_kwargs=None, logger_dir=None, initializer=None): if initializer is not None: initializer(mpi_rank=mpi_rank, subrank=subrank) wrapper_kwargs = wrapper_kwargs or {} env_kwargs = env_kwargs or {} if ':' in env_id: import re import importlib module_name = re.sub(':.*', '', env_id) env_id = re.sub('.*:', '', env_id) importlib.import_module(module_name) if env_type == 'atari': env = make_atari(env_id) elif env_type == 'retro': import retro gamestate = gamestate or retro.State.DEFAULT env = retro_wrappers.make_retro( game=env_id, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE, state=gamestate) elif env_type == 'robotics': env = gym.make(env_id, **env_kwargs) env = DoneOnSuccessWrapper(env) else: env = gym.make(env_id, **env_kwargs) if flatten_dict_observations and isinstance(env.observation_space, gym.spaces.Dict): if env_type == 'robotics': env = FlattenObservation( FilterObservation(env, ['observation', 'desired_goal'])) else: env = FlattenObservation(env) env.seed(seed + subrank if seed is not None else None) env = Monitor(env, logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)), allow_early_resets=True) if env_type == 'atari': env = wrap_deepmind(env, **wrapper_kwargs) elif env_type == 'retro': if 'frame_stack' not in wrapper_kwargs: wrapper_kwargs['frame_stack'] = 1 env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs) if isinstance(env.action_space, gym.spaces.Box): env = ClipActionsWrapper(env) if reward_scale != 1: env = retro_wrappers.RewardScaler(env, reward_scale) return env
def test_nested_dicts_ravel(self, observation_space, flat_shape): env = FakeEnvironment(observation_space=observation_space) wrapped_env = FlattenObservation(FilterObservation(env, env.obs_keys)) obs = wrapped_env.reset() assert obs.shape == wrapped_env.observation_space.shape
def load(environment_name, env_id=None, concat_desired_goal=True, discount=1.0, max_episode_steps=None, sparse_reward=False, use_success_wrapper=True, gym_env_wrappers=(), alf_env_wrappers=(), wrap_with_process=False): """Loads the selected environment and wraps it with the specified wrappers. Note that by default a ``TimeLimit`` wrapper is used to limit episode lengths to the default benchmarks defined by the registered environments. Args: environment_name: Name for the environment to load. env_id: A scalar ``Tensor`` of the environment ID of the time step. discount: Discount to use for the environment. max_episode_steps: If None the ``max_episode_steps`` will be set to the default step limit defined in the environment's spec. No limit is applied if set to 0 or if there is no ``timestep_limit`` set in the environment's spec. sparse_reward (bool): If True, the game ends once the goal is achieved. Rewards will be added by 1, changed from -1/0 to 0/1. use_success_wrapper (bool): If True, wraps the environment with the SuccessWrapper which will record Success info after a specified amount of timesteps. gym_env_wrappers: Iterable with references to wrapper classes to use directly on the gym environment. alf_env_wrappers: Iterable with references to wrapper classes to use on the torch environment. Returns: An AlfEnvironment instance. """ assert (environment_name.startswith("Fetch") or environment_name.startswith("HandManipulate")), ( "This suite only supports OpenAI's Fetch and ShadowHand envs!") _unwrapped_env_checker_.check_and_update(wrap_with_process) gym_spec = gym.spec(environment_name) env = gym_spec.make() if max_episode_steps is None: if gym_spec.max_episode_steps is not None: max_episode_steps = gym_spec.max_episode_steps else: max_episode_steps = 0 def env_ctor(env_id=None): return suite_gym.wrap_env( env, env_id=env_id, discount=discount, max_episode_steps=max_episode_steps, gym_env_wrappers=gym_env_wrappers, alf_env_wrappers=alf_env_wrappers, image_channel_first=False) # concat robot's observation and the goal location if concat_desired_goal: keys = ["observation", "desired_goal"] try: # for modern Gym (>=0.15.4) from gym.wrappers import FilterObservation, FlattenObservation env = FlattenObservation(FilterObservation(env, keys)) except ImportError: # for older gym (<=0.15.3) from gym.wrappers import FlattenDictWrapper # pytype:disable=import-error env = FlattenDictWrapper(env, keys) if use_success_wrapper: env = SuccessWrapper(env, max_episode_steps) env = ObservationClipWrapper(env) if sparse_reward: env = SparseReward(env) if wrap_with_process: process_env = process_environment.ProcessEnvironment( functools.partial(env_ctor)) process_env.start() torch_env = alf_wrappers.AlfEnvironmentBaseWrapper(process_env) else: torch_env = env_ctor(env_id=env_id) return torch_env
def train(self): """Method for training the Network""" for epoch in range(self.n_epochs): for episode in range(self.n_episodes): done = False score = 0 episode_experience = [] # Reset the environment to it's initial state observation = self.env.reset() observation = FlattenObservation( FilterObservation( observation, ['observation', 'achieved_goal', 'desired_goal'])) # Because we are not working with a continous action space, # we are limiting ourselfs to a finite number of timesteps # per episode, other wise the below for loop would be replaced # with `while not done:` for _ in range(self.n_time_steps): self.env.render() action = self.act(observation['observation']) print(action) new_observation, reward, done, info = self.env.step(action) score += reward episode_experience.append( (observation['observation'], action, reward, new_observation['observation'], done)) self.save(np.asarray(observation['observation']), action, reward, new_observation['observation'], done) observation = new_observation self.learn() # break if we finish the environment if done is True: break # HER Algorithm for t in range(len(episode_experience)): for _ in range(self.K): future = np.random.randint(t, len(episode_experience)) goal = episode_experience[future][3] state = episode_experience[t][0] action = episode_experience[t][1] next_state = episode_experience[t][3] done = np.array_equal(next_state, goal) reward = 0 if done else -1 self.save(state, action, reward, next_state, done) # save model every 5 epochs # this is an arbitrary number and will change if epoch % 10 == 0 and epoch > 0: self.save_model
def make_toylab_dads_env(**kwargs): env = DADSCustomToyLabEnv() env = ObsAsOrderedDict(env) env = FilterObservation(env, filter_keys=["achieved_goal"]) env = FlattenObservation(env) return DADSWrapper(env, **kwargs)
def _process_goalenv(env: GoalEnv, **kwargs): env = FilterObservation(env, filter_keys=["observation"]) env = FlattenObservation(env) return DADSWrapper(env, **kwargs)
import multiprocessing import neat import numpy as np import os import pickle import random import time from gym.wrappers import FlattenObservation, FilterObservation import visualize NUM_CORES = 1 env = gym.make('FetchReach-v1') env.env.reward_type = 'dense' env = FlattenObservation(FilterObservation(env, ['observation', 'desired_goal'])) print("action space: {0!r}".format(env.action_space)) print("observation space: {0!r}".format(env.observation_space)) env = gym.wrappers.Monitor(env, 'results', force=True) class RoboGenome(neat.DefaultGenome): def __init__(self, key): super().__init__(key) self.discount = None def configure_new(self, config): super().configure_new(config) self.discount = 0.01 + 0.98 * random.random()
def flatten_env(dict_env, drop_abs_position): flat_obs_content = ["observation", "desired_goal", "achieved_goal"] if drop_abs_position: flat_obs_content.remove("achieved_goal") # Because always 0 vector return FlattenObservation( FilterObservation(dict_env, filter_keys=flat_obs_content))
def _make_flat(*args, **kargs): if "FlattenDictWrapper" in dir(): return FlattenDictWrapper(*args, **kargs) return FlattenObservation(FilterObservation(*args, **kargs))