def __init__(self, n=4, num_steps=100, reward_for_remembering=1000): """ :param n: Number of different values that could be returned :param num_steps: How many steps the agent needs to remember. :param reward_for_remembering: The reward for remembering the number. """ super().__init__() self.num_steps = num_steps self.n = n self._action_space = Discrete(self.n + 1) self._observation_space = Discrete(self.n + 1) self._t = 1 self._reward_for_remembering = reward_for_remembering self._target = None self._next_obs = None
def convert_gym_space(space): if isinstance(space, gym.spaces.Box): return Box(low=space.low, high=space.high) elif isinstance(space, gym.spaces.Discrete): return Discrete(n=space.n) else: raise NotImplementedError
def convert_gym_space(space): if isinstance(space, Box): return Box(low=space.low, high=space.high) elif isinstance(space, gym.spaces.Discrete): return Discrete(n=space.n) elif isinstance(space, gym.spaces.Tuple): return Product([convert_gym_space(x) for x in space.spaces]) else: raise NotImplementedError
def convert_gym_space(space): # import IPython; IPython.embed() if isinstance(space, gym.spaces.Box): return Box(low=0, high=255, shape=(80, 80)) elif isinstance(space, gym.spaces.Discrete): return Discrete(n=space.n) elif isinstance(space, gym.spaces.Tuple): return Product([convert_gym_space(x) for x in space.spaces]) else: raise NotImplementedError
def __init__(self, app_name, time_state=False, idx=0, is_render=False, no_graphics=False, recording=True): Serializable.quick_init(self, locals()) # Unity scene self._env = UnityEnvironment(file_name=app_name, worker_id=idx, no_graphics=no_graphics) self.id = 0 self.name = app_name self.idx = idx self.is_render = is_render self.time_state = time_state self.time_step = 0 # Check brain configuration assert len(self._env.brains) == 1 self.brain_name = self._env.external_brain_names[0] brain = self._env.brains[self.brain_name] # Check for number of agents in scene initial_info = self._env.reset()[self.brain_name] self.use_visual = (brain.number_visual_observations == 1) and False self.recording = brain.number_visual_observations == 1 and recording # Set observation and action spaces if brain.vector_action_space_type == "discrete": self._action_space = Discrete(1) else: high = np.array([np.inf] * (brain.vector_action_space_size)) self._action_space = Box(-high, high) # ---------------------------------- if self.use_visual and False and no_graphic: high = np.array([np.inf] * brain.camera_resolutions[0]["height"] * brain.camera_resolutions[0]["width"] * 3) self._observation_space = Box(-high, high) else: if self.time_state: high = np.array([np.inf] * (brain.vector_observation_space_size + 1)) else: high = np.array([np.inf] * (brain.vector_observation_space_size)) self._observation_space = Box(-high, high) # video buffer self.frames = []
def convert_gym_space(space, box_additional_dim=0): if isinstance(space, gym.spaces.Box): if box_additional_dim != 0: low = np.concatenate([space.low, [-np.inf] * box_additional_dim]) high = np.concatenate([space.high, [np.inf] * box_additional_dim]) return Box(low=low, high=high) return Box(low=space.low, high=space.high) elif isinstance(space, gym.spaces.Discrete): return Discrete(n=space.n) elif isinstance(space, gym.spaces.Tuple): return Product([convert_gym_space(x) for x in space.spaces]) else: raise NotImplementedError
def convert_gym_space(space): if isinstance(space, gym.spaces.Box): return Box(low=space.low, high=space.high) elif isinstance(space, gym.spaces.Discrete): return Discrete(n=space.n) elif isinstance(space, gym.spaces.Tuple): return Product([convert_gym_space(x) for x in space.spaces]) elif isinstance(space, list): # For multiagent envs return list(map(convert_gym_space, space)) # TODO(cathywu) refactor multiagent envs to use gym.spaces.Tuple # (may be needed for pickling?) else: raise NotImplementedError
def convert_gym_space(space, n_agents=1): if isinstance(space, gym.spaces.Box) or isinstance(space, Box): if len(space.shape) > 1: assert n_agents == 1, "multi-dimensional inputs for centralized agents not supported" return Box(low=np.min(space.low), high=np.max(space.high), shape=space.shape) else: return Box(low=np.min(space.low), high=np.max(space.high), shape=(space.shape[0] * n_agents, )) elif isinstance(space, gym.spaces.Discrete) or isinstance(space, Discrete): return Discrete(n=space.n**n_agents) else: raise NotImplementedError
def convert_gym_space(space): """ Convert a gym.space to an rllab.space :param space: (obj:`gym.Space`) The Space object to convert :return: converted rllab.Space object """ if isinstance(space, gym.spaces.Box): return Box(low=space.low, high=space.high) elif isinstance(space, gym.spaces.Discrete): return Discrete(n=space.n) elif isinstance(space, gym.spaces.Tuple): return Product([convert_gym_space(x) for x in space.spaces]) elif isinstance(space, gym.spaces.Dict): return Dict(space.spaces) else: raise TypeError
def convert_gym_space(space): if isinstance(space, gym.spaces.Box): return Box(low=space.low, high=space.high) elif isinstance(space, gym.spaces.Discrete): return Discrete(n=space.n) elif isinstance(space, gym.spaces.Tuple): return Product([convert_gym_space(x) for x in space.spaces]) # added for robotics enviroments elif isinstance(space, gym.spaces.Dict): b_low = np.concatenate((space.spaces["desired_goal"].low,space.spaces["achieved_goal"].low,space.spaces["observation"].low)) b_high = np.concatenate((space.spaces["desired_goal"].high,space.spaces["achieved_goal"].high,space.spaces["observation"].high)) name = Box(low=b_low, high=b_high) print(name) return name # end addition else: raise NotImplementedError
class OneCharMemory(Env, SupervisedLearningEnv): """ A simple env whose output is a value `X` the first time step, followed by a fixed number of zeros. The reward is 1 for all steps if the agent outputs zero except at the end, where the agent gets a large reward if `X` is returned correctly. Both the actions and observations are represented as one-hot vectors. There are `n` different values that `X` can take on (excluding 0), so the one-hot vector's dimension is n+1. """ def __init__(self, n=4, num_steps=100, reward_for_remembering=1000): """ :param n: Number of different values that could be returned :param num_steps: How many steps the agent needs to remember. :param reward_for_remembering: The reward for remembering the number. """ super().__init__() self.num_steps = num_steps self.n = n self._action_space = Discrete(self.n + 1) self._observation_space = Discrete(self.n + 1) self._t = 1 self._reward_for_remembering = reward_for_remembering self._target = None self._next_obs = None def step(self, action): # flatten = to one hot...not sure why it was given that name. observation = self._get_next_observation() self._next_obs = 0 done = self._t == self.num_steps self._t += 1 if done: reward = self._reward_for_remembering * int( self._observation_space.unflatten(action) == self._target) else: reward = int(self._observation_space.unflatten(action) == 0) info = {'target': self.n} return observation, reward, done, info @property def action_space(self): return self._action_space @property def horizon(self): return self.num_steps def reset(self): self._target = randint(1, self.n) self._next_obs = self._target self._t = 1 return self._get_next_observation() def _get_next_observation(self): return self._observation_space.flatten(self._next_obs) @property def observation_space(self): return self._observation_space def get_batch(self, batch_size): targets = np.random.randint( low=1, high=self.n, size=batch_size, ) onehot_targets = special.to_onehot_n(targets, self.feature_dim) X = np.zeros((batch_size, self.sequence_length, self.feature_dim)) X[:, :, 0] = 1 # make the target 0 X[:, 0, :] = onehot_targets Y = np.zeros((batch_size, self.sequence_length, self.target_dim)) Y[:, :, 0] = 1 # make the target 0 Y[:, -1, :] = onehot_targets return X, Y @property def feature_dim(self): return self.n + 1 @property def target_dim(self): return self.n + 1 @property def sequence_length(self): return self.horizon