def make_env(id_or_path: str, *, max_episode_timesteps: Optional[int] = None) -> gym.Env: try: env = make_po_env(id_or_path) except ValueError: try: print('Loading using gym.make') env = gym.make(id_or_path) except gym.error.Error: print(f'Environment with id {id_or_path} not found.' 'Trying as a GV YAML environment') env = make_gv_env(id_or_path) checkraise( hasattr(env, 'state_space'), ValueError, f'env {id_or_path} does not have state_space', ) if isinstance(env.unwrapped, gym_pomdps.POMDP): env = FlatPaddingWrapper(env) if max_episode_timesteps is not None: env = TimeLimit(env, max_episode_timesteps) return env
def __init__(self, space: gym.spaces.Dict): super().__init__() checkraise( 'agent' in space.spaces, KeyError, 'space does not contain `agent` key', ) self.space = space
def __init__(self, input_space: gym.spaces.Discrete): super().__init__() checkraise( isinstance(input_space, gym.spaces.Discrete), TypeError, 'input_space must be Discrete', ) self.__num_classes = input_space.n
def __init__(self, input_space: gym.spaces.Box): super().__init__() checkraise( isinstance(input_space, gym.spaces.Box) and len(input_space.shape) == 1, TypeError, 'input_space must be Box', ) (self.__out_dim,) = input_space.shape
def __init__(self, env: gym.Env): checkraise( isinstance(env.observation_space, gym.spaces.Discrete), ValueError, 'env.observation_space must be Discrete', ) super().__init__(env) self.observation_space = gym.spaces.Discrete(env.observation_space.n + 1)
def returns(rewards: np.ndarray, discount: float) -> np.ndarray: """Return the empirical episodic returns from rewards. :param rewards: (B, T) np.ndarray of rewards :param discount: discount factor :rtype: (B,) np.ndarray of empirical returns """ checkraise(rewards.ndim > 1, ValueError, 'invalid rewards.ndim {}', rewards.ndim) num_steps = rewards.shape[-1] return np.einsum('j,...j->...', discounts(num_steps, discount), rewards)
def discounts_uncached(num_steps: int, discount: float) -> np.ndarray: """Return the discounts array $[1., \\gamma, \\gamma^2, \\ldots]$. :param num_steps: size of the output array :param discount: discount factor :rtype: (N,) np.ndarray of discounts """ checkraise(num_steps > 0, ValueError, 'invalid `num_steps` {}', num_steps) checkraise(0.0 <= discount <= 1.0, ValueError, 'invalid `discount` {}', discount) return discount**np.arange(num_steps, dtype=float)
def _check_gv_state_space_keys(space: gym.Space) -> bool: checkraise( isinstance(space, gym.spaces.Dict), TypeError, 'incorrect state space type', ) for key in ['grid', 'agent_id_grid', 'agent', 'item']: checkraise( key in space.spaces, KeyError, f'space does not contain `{key}` key', )
def mc_q_estimator( rewards: torch.Tensor, values: torch.Tensor, # pylint: disable=unused-argument *, discount: float, ) -> torch.Tensor: checkraise(rewards.ndim == 1, ValueError, '`rewards` must have 1 dimension') size = rewards.size(-1) indices = torch.arange(size) exponents = indices.unsqueeze(0) - indices.unsqueeze(-1) discounts = (discount**exponents).triu() return discounts @ rewards
def __init__( self, space: gym.spaces.Dict, embedding: EmbeddingRepresentation, ): super().__init__() checkraise( 'grid' in space.spaces, KeyError, 'space does not contain `grid` key', ) self.space = space self.embedding = embedding
def torch(self) -> Batch: checkraise( isinstance(self.states, np.ndarray) and isinstance(self.observations, np.ndarray) and isinstance(self.actions, np.ndarray) and isinstance(self.rewards, np.ndarray) and isinstance(self.dones, np.ndarray), TypeError, 'Batch is not numpy to begin with??', ) return Batch( states=numpy2torch(self.states), observations=numpy2torch(self.observations), actions=numpy2torch(self.actions), rewards=numpy2torch(self.rewards), next_states=numpy2torch(self.next_states), next_observations=numpy2torch(self.next_observations), dones=numpy2torch(self.dones), )
def __init__(self, env: gym.Env, indices: List[int]): checkraise( isinstance(env.observation_space, gym.spaces.Box) and len(env.observation_space.shape) == 1, ValueError, 'env.observation_space must be flat Box', ) checkraise( len(set(indices)) == len(indices), ValueError, 'indices must be unique', ) checkraise( len(indices) <= env.observation_space.shape[0], ValueError, 'number of indices must not exceed state dimensions', ) checkraise( min(indices) >= 0, ValueError, 'indices must be non-negative', ) checkraise( max(indices) < env.observation_space.shape[0], ValueError, 'indices must be lower than state dimensions', ) super().__init__(env) self.indices = indices self.state_space = env.observation_space self.observation_space = gym.spaces.Box( env.observation_space.low[self.indices], env.observation_space.high[self.indices], ) self.state = None
def torch(self) -> Episode: checkraise( (isinstance(self.states, np.ndarray) or isinstance(self.states, dict) and all(isinstance(v, np.ndarray) for v in self.states.values())) and (isinstance(self.observations, np.ndarray) or isinstance(self.observations, dict) and all( isinstance(v, np.ndarray) for v in self.observations.values())) and isinstance(self.actions, np.ndarray) and isinstance(self.rewards, np.ndarray), TypeError, 'Episode is not numpy to begin with??', ) return Episode( states=numpy2torch(self.states), observations=numpy2torch(self.observations), actions=numpy2torch(self.actions), rewards=numpy2torch(self.rewards), )
def __init__( self, space: gym.spaces.Dict, embedding: EmbeddingRepresentation, ): super().__init__() checkraise( 'grid' in space.spaces, KeyError, 'space does not contain `grid` key', ) checkraise( 'agent_id_grid' in space.spaces, KeyError, 'space does not contain `agent_id_grid` key', ) self.space = space self.embedding = embedding in_channels = 3 * embedding.dim + 1 # adding one for agent_id_grid self.cnn = gv_cnn(in_channels)
def discounts(num_steps: int, discount: float) -> np.ndarray: """Return the discounts array $[1., \\gamma, \\gamma^2, \\ldots]$. :param num_steps: size of the output array :param discount: discount factor :rtype: (N,) np.ndarray of discounts """ checkraise(num_steps > 0, ValueError, 'invalid `num_steps` {}', num_steps) checkraise(0.0 <= discount <= 1.0, ValueError, 'invalid `discount` {}', discount) cached_discounts = discounts_cache[discount] if cached_discounts.size >= num_steps: discounts_ = cached_discounts[:num_steps] if cached_discounts.size < num_steps: discounts_ = discounts_uncached(num_steps, discount) discounts_cache[discount] = discounts_ return discounts_ return discounts_
def __init__(self, input_space: gym.spaces.Box, dims: Sequence[int]): super().__init__() checkraise( isinstance(input_space, gym.spaces.Box) and len(input_space.shape) == 1, TypeError, 'input_space must be Box', ) checkraise( len(dims) > 0, ValueError, 'dims must be non-empty', ) (input_dim,) = input_space.shape self.dims = list(itt.chain([input_dim], dims)) modules = mitt.flatten( (make_module('linear', 'relu', in_dim, out_dim), nn.ReLU()) for in_dim, out_dim in mitt.pairwise(self.dims) ) self.model = nn.Sequential(*modules)
def filter_models(models: nn.ModuleDict, keys) -> nn.ModuleDict: if isinstance(keys, list): missing_keys = set(keys) - models.keys() checkraise( len(missing_keys) == 0, ValueError, 'models dictionary does not contains keys {}', missing_keys, ) return nn.ModuleDict({k: models[k] for k in keys}) if isinstance(keys, dict): missing_keys = set(keys.keys()) - models.keys() checkraise( len(missing_keys) == 0, ValueError, 'models dictionary does not contains keys {}', missing_keys, ) return nn.ModuleDict( {k: filter_models(models[k], v) for k, v in keys.items()}) raise NotImplementedError
def td0_q_estimator( rewards: torch.Tensor, values: torch.Tensor, *, discount: float, ) -> torch.Tensor: checkraise(rewards.ndim == 1, ValueError, '`rewards` must have 1 dimension') checkraise(values.ndim == 1, ValueError, '`values` must have 1 dimension') checkraise( rewards.shape == values.shape, ValueError, '`rewards` and `values` must have the same shape', ) values = values.roll(-1) values[-1] = 0.0 return rewards + discount * values
def make_schedule( name: str, *, const: Optional[int] = None, value_from: Optional[float] = None, value_to: Optional[float] = None, nsteps: Optional[int] = None, halflife: Optional[int] = None, ) -> Schedule: if name == 'constant': checkraise( const is not None, ValueError, f'invalid arguments {const}', ) return functools.partial(constant_schedule, const=const) if name == 'linear': checkraise( None not in [value_from, value_to, nsteps], ValueError, f'invalid arguments {value_from} {value_to} {nsteps}', ) return functools.partial( linear_schedule, value_from=value_from, value_to=value_to, nsteps=nsteps, ) if name == 'exponential': checkraise( None not in [value_from, halflife], ValueError, f'invalid arguments {value_from} {halflife}', ) return functools.partial( exponential_schedule, value_from=value_from, halflife=halflife, ) raise ValueError(f'invalid schedule name {name}')
def tdn_q_estimator( rewards: torch.Tensor, values: torch.Tensor, *, discount: float, n: int, ) -> torch.Tensor: checkraise(rewards.ndim == 1, ValueError, '`rewards` must have 1 dimension') checkraise(values.ndim == 1, ValueError, '`values` must have 1 dimension') checkraise( rewards.shape == values.shape, ValueError, '`rewards` and `values` must have the same shape', ) size = rewards.size(-1) indices = torch.arange(size) exponents = indices.unsqueeze(0) - indices.unsqueeze(-1) discounts = (discount**exponents).triu().tril(n - 1) values = values.roll(-n) values[-n:] = 0.0 return discounts @ rewards + (discount**n) * values
def tdlambda_q_estimator( rewards: torch.Tensor, values: torch.Tensor, *, discount: float, lambda_: float, ) -> torch.Tensor: checkraise(rewards.ndim == 1, ValueError, '`rewards` must have 1 dimension') checkraise(values.ndim == 1, ValueError, '`values` must have 1 dimension') checkraise( rewards.shape == values.shape, ValueError, '`rewards` and `values` must have the same shape', ) size = rewards.size(-1) indices = torch.arange(size) exponents = indices.unsqueeze(0) - indices.unsqueeze(-1) discounts = ((discount * lambda_)**exponents).triu() values = values.roll(-1) values[-1] = 0.0 return discounts @ (rewards + discount * (1 - lambda_) * values)
def _make_gv_model(self, name: str): if name == 'agent': checkraise( 'agent' in self.space.spaces, KeyError, 'space does not contain `agent` key', ) return GV_Agent_Representation(self.space) if name == 'item': checkraise( 'item' in self.space.spaces, KeyError, 'space does not contain `item` key', ) return GV_Item_Representation(self.space, self.embedding) if name == 'grid-cnn': checkraise( 'grid' in self.space.spaces, KeyError, 'space does not contain `grid` key', ) return GV_Grid_CNN_Representation(self.space, self.embedding) if name == 'grid-fc': checkraise( 'grid' in self.space.spaces, KeyError, 'space does not contain `grid` key', ) return GV_Grid_FC_Representation(self.space, self.embedding) if name == 'agent-grid-cnn': checkraise( 'grid' in self.space.spaces, KeyError, 'space does not contain `grid` key', ) checkraise( 'agent_id_grid' in self.space.spaces, KeyError, 'space does not contain `agent_id_grid` key', ) return GV_AgentGrid_CNN_Representation(self.space, self.embedding) if name == 'agent-grid-fc': checkraise( 'grid' in self.space.spaces, KeyError, 'space does not contain `grid` key', ) checkraise( 'agent_id_grid' in self.space.spaces, KeyError, 'space does not contain `agent_id_grid` key', ) return GV_AgentGrid_FC_Representation(self.space, self.embedding) raise ValueError(f'invalid gv model name {name}')
def make_po_env(name: str) -> gym.Env: m = po_env_id_re.match(name) # m[0] is the full name # m[1] is the first capture, i.e., the type of partial observability # m[2] is the second capture, i.e., the name w/o the version # m[3] is the third capture, i.e., the version checkraise(m is not None, ValueError, f'env name {name} does not satisfy regex') assert m is not None # silly forcing of type checking po_type = m[1] env_name = m[2] version = m[3] non_po_name = f'{env_name}-v{version}' if env_name == 'CartPole': indices_dict = { 'pos': [0, 2], # ignore velocities 'vel': [1, 3], # ignore positions 'full': [0, 1, 2, 3], # ignore nothing } checkraise( po_type in indices_dict.keys(), ValueError, f'invalid partial observability {po_type}', ) env = gym.make(non_po_name) indices = indices_dict[po_type] return IndexWrapper(env, indices) if env_name == 'LunarLander': indices_dict = { 'pos': [0, 1, 4, 6, 7], # ignore velocities 'vel': [2, 3, 5, 6, 7], # ignore positions 'full': [0, 1, 2, 3, 4, 5, 6, 7], # ignore nothing } checkraise( po_type in indices_dict.keys(), ValueError, f'invalid partial observability {po_type}', ) env = gym.make(non_po_name) indices = indices_dict[po_type] return IndexWrapper(env, indices) if env_name == 'Acrobot': indices_dict = { 'pos': [0, 1, 2, 3], # ignore velocities 'vel': [4, 5], # ignore positions 'full': [0, 1, 2, 3, 4, 5], # ignore nothing } checkraise( po_type in indices_dict.keys(), ValueError, f'invalid partial observability {po_type}', ) env = gym.make(non_po_name) indices = indices_dict[po_type] return IndexWrapper(env, indices) raise ValueError('invalid env name {env_name}')