def _expand_node( self, trees: _MCTSTree, n, # n-th expansion, zero-based to_plays, model_output: ModelOutput, dirichlet_alpha=None, exploration_fraction=0.): if self._is_two_player_game: trees.to_play[:, n] = to_plays if trees.game_over is not None: trees.game_over[:, n] = model_output.game_over def _set_tree_state(ts, s): ts[:, n] = s nest.map_structure(_set_tree_state, trees.model_state, model_output.state) if trees.reward is not None: trees.reward[:, n] = model_output.reward if trees.action is not None: trees.action[:, n] = model_output.actions prior = model_output.action_probs if exploration_fraction > 0.: batch_size = model_output.action_probs.shape[0] noise_dist = td.Dirichlet( dirichlet_alpha * torch.ones(trees.branch_factor)) noise = noise_dist.sample((batch_size, )) noise = noise * (prior != 0) noise = noise / noise.sum(dim=1, keepdim=True) prior = exploration_fraction * noise + ( 1 - exploration_fraction) * prior trees.prior[:, n] = prior
def test_transform_nest(self): ntuple = NTuple( a=dict(x=torch.zeros(()), y=torch.zeros((2, 4))), b=torch.zeros((4, ))) transformed_ntuple = transform_nest( ntuple, field='a.x', func=lambda x: x + 1.0) ntuple.a.update({'x': torch.ones(())}) nest.map_structure(self.assertEqual, transformed_ntuple, ntuple) ntuple = NTuple( a=dict(x=torch.zeros(()), y=torch.zeros((2, 4))), b=NTuple(a=torch.zeros((4, )), b=NTuple(a=[1], b=[1]))) transformed_ntuple = transform_nest( ntuple, field='b.b.b', func=lambda _: [2]) ntuple = ntuple._replace( b=ntuple.b._replace(b=ntuple.b.b._replace(b=[2]))) nest.map_structure(self.assertEqual, transformed_ntuple, ntuple) ntuple = NTuple(a=1, b=2) transformed_ntuple = transform_nest(ntuple, None, NestSum()) self.assertEqual(transformed_ntuple, 3) tuples = [("a", 12), ("b", 13)] nested = collections.OrderedDict(tuples) def _check_path(path, e): self.assertEqual(nested[path], e) res = nest.py_map_structure_with_path(_check_path, nested) nest.assert_same_structure(nested, res)
def testResetSavesCurrentTimeStep(self): obs_spec = BoundedTensorSpec((1, ), torch.int32) action_spec = BoundedTensorSpec((1, ), torch.int64) random_env = RandomAlfEnvironment(observation_spec=obs_spec, action_spec=action_spec) time_step = random_env.reset() current_time_step = random_env.current_time_step() nest.map_structure(self.assertEqual, time_step, current_time_step)
def _obtain_zero_info(self): """Get an env info of zeros only once when the env is created. This info will be filled in each ``FIRST`` time step as a placeholder. """ self._gym_env.reset() action = nest.map_structure(lambda spec: spec.numpy_zeros(), self._action_spec) _, _, _, info = self._gym_env.step(action) self._gym_env.reset() info = _as_array(info) return nest.map_structure(lambda a: np.zeros_like(a), info)
def rsample_action_distribution(nested_distributions): """Sample actions from distributions with reparameterization-based sampling (rsample) to enable backpropagation. Args: nested_distributions (nested Distribution): action distributions. Returns: rsampled actions """ assert all(nest.flatten(nest.map_structure(lambda d: d.has_rsample, nested_distributions))), \ ("all the distributions need to support rsample in order to enable " "backpropagation") return nest.map_structure(lambda d: d.rsample(), nested_distributions)
def _generate_time_step(batched, observation, step_type, discount, prev_action=None, action_spec=None, reward=None, reward_spec=ts.TensorSpec(()), env_id=None, env_info={}): flat_observation = nest.flatten(observation) if all(map(_is_numpy_array, flat_observation)): md = np if reward is not None: reward = np.float32(reward) discount = np.float32(discount) else: assert all( map(torch.is_tensor, flat_observation)), ("Elements in observation must be Tensor") md = torch if reward is not None: reward = to_tensor(reward, dtype=torch.float32) discount = to_tensor(discount, dtype=torch.float32) if batched: batch_size = flat_observation[0].shape[0] outer_dims = (batch_size, ) if env_id is None: env_id = md.arange(batch_size, dtype=md.int32) if reward is not None: assert reward.shape[:1] == outer_dims if prev_action is not None: flat_action = nest.flatten(prev_action) assert flat_action[0].shape[:1] == outer_dims else: outer_dims = () if env_id is None: env_id = md.zeros((), dtype=md.int32) step_type = md.full(outer_dims, step_type, dtype=md.int32) if reward is None: reward = md.zeros(outer_dims + reward_spec.shape, dtype=md.float32) discount = md.ones(outer_dims, dtype=md.float32) * discount if prev_action is None: prev_action = nest.map_structure( lambda spec: md.zeros(outer_dims + spec.shape, dtype=getattr( md, ts.torch_dtype_to_str(spec.dtype))), action_spec) return TimeStep(step_type, reward, discount, observation, prev_action, env_id, env_info=env_info)
def epsilon_greedy_sample(nested_distributions, eps=0.1): """Generate greedy sample that maximizes the probability. Args: nested_distributions (nested Distribution): distribution to sample from eps (float): a floating value in :math:`[0,1]`, representing the chance of action sampling instead of taking argmax. This can help prevent a dead loop in some deterministic environment like `Breakout`. Returns: (nested) Tensor: """ def greedy_fn(dist): # pytorch distribution has no 'mode' operation greedy_action = get_mode(dist) if eps == 0.0: return greedy_action sample_action = dist.sample() greedy_mask = torch.rand(sample_action.shape[0]) > eps sample_action[greedy_mask] = greedy_action[greedy_mask] return sample_action if eps >= 1.0: return sample_action_distribution(nested_distributions) else: return nest.map_structure(greedy_fn, nested_distributions)
def array_to_tensor(data): def _array_to_cpu_tensor(obj): return torch.as_tensor( obj, device='cpu') if isinstance(obj, (np.ndarray, np.number)) else obj return nest.map_structure(_array_to_cpu_tensor, data)
def _step(self, action): # Automatically reset the environments on step if they need to be reset. if self._auto_reset and self._done: return self.reset() observation, reward, self._done, self._info = self._gym_env.step( action) observation = self._to_spec_dtype_observation(observation) self._info = nest.map_structure(_as_array, self._info) if self._done: return ds.termination( observation, action, reward, self._reward_spec, self._env_id, env_info=self._info) else: return ds.transition( observation, action, reward, self._reward_spec, self._discount, self._env_id, env_info=self._info)
def zeros_from_spec(nested_spec, batch_size): """Create nested zero Tensors or Distributions. A zero tensor with shape[0]=`batch_size is created for each TensorSpec and A distribution with all the parameters as zero Tensors is created for each DistributionSpec. Args: nested_spec (nested TensorSpec or DistributionSpec): batch_size (int|tuple|list): batch size/shape added as the first dimension to the shapes in TensorSpec Returns: nested Tensor or Distribution """ if isinstance(batch_size, Iterable): shape = batch_size else: shape = [batch_size] def _zero_tensor(spec): return spec.zeros(shape) param_spec = dist_utils.to_distribution_param_spec(nested_spec) params = nest.map_structure(_zero_tensor, param_spec) return dist_utils.params_to_distributions(params, nested_spec)
def tensor_to_array(data): def _tensor_to_array(obj): if torch.is_tensor(obj): return obj.cpu().numpy() else: return obj return nest.map_structure(_tensor_to_array, data)
def cpu(self): """Get the cpu version of this data structure.""" r = getattr(self, "_cpu", None) if r is None: r = nest.map_structure( lambda x: x.cpu() if isinstance(x, torch.Tensor) else x, self) self._cpu = r return r
def _as_array(nested): """Convert numbers in ``nested`` to np.ndarray.""" def __as_array(x): if isinstance(x, numbers.Number): return np.array(x) return x return nest.map_structure(__as_array, nested)
def sample_action_distribution(nested_distributions): """Sample actions from distributions with conventional sampling without enabling backpropagation. Args: nested_distributions (nested Distribution): action distributions. Returns: sampled actions """ return nest.map_structure(lambda d: d.sample(), nested_distributions)
def detach(nests): """Detach nested Tensors. Args: nests (nested Tensor): tensors to be detached Returns: detached Tensors with same structure as nests """ return nest.map_structure(lambda t: t.detach(), nests)
def _unstack_actions(self, batched_actions): """Returns a list of actions from potentially nested batch of actions.""" batched_actions = nest.map_structure(lambda x: x.cpu(), batched_actions) flattened_actions = nest.flatten(batched_actions) if self._flatten: unstacked_actions = zip(*flattened_actions) else: unstacked_actions = [ nest.pack_sequence_as(batched_actions, actions) for actions in zip(*flattened_actions) ] return unstacked_actions
def _stack_time_steps(self, time_steps): """Given a list of TimeStep, combine to one with a batch dimension.""" if self._flatten: stacked = nest.fast_map_structure_flatten( lambda *arrays: torch.stack(arrays), self._time_step_with_env_info_spec, *time_steps) else: stacked = nest.fast_map_structure( lambda *arrays: torch.stack(arrays), *time_steps) if alf.get_default_device() == "cuda": cpu = stacked stacked = nest.map_structure(lambda x: x.cuda(), cpu) stacked._cpu = cpu return stacked
def _check_action_specs_for_critic_networks(action_spec, action_input_processors, action_preprocessing_combiner): if len(nest.flatten(action_spec)) > 1: assert action_preprocessing_combiner is not None, ( "An action combiner is needed when there are multiple action specs:" " {}".format(action_spec)) def _check_individual(spec, proc): if spec.is_discrete: assert proc is not None, ( 'CriticNetwork only supports continuous actions. One of given ' + 'action specs {} is discrete. Use QNetwork instead. '.format( spec) + 'Alternatively, specify `action_input_processors` to transform ' + 'discrete actions to continuous action embeddings first.') if action_input_processors is None: action_input_processors = nest.map_structure(lambda _: None, action_spec) nest.map_structure(_check_individual, action_spec, action_input_processors)
def compute_entropy(distributions): """Computes total entropy of nested distribution. Args: distributions (nested Distribution): A possibly batched tuple of distributions. Returns: entropy """ def _compute_entropy(dist: td.Distribution): entropy = dist.entropy() return entropy entropies = nest.map_structure(_compute_entropy, distributions) total_entropies = sum(nest.flatten(entropies)) return total_entropies
def forward(self, observation, state=()): """Computes an action distribution given an observation. Args: observation (torch.Tensor): consistent with ``input_tensor_spec`` state: empty for API consistent with ``ActorRNNDistributionNetwork`` Returns: act_dist (torch.distributions): action distribution state: empty """ encoding, state = self._encoding_net(observation, state) act_dist = nest.map_structure(lambda proj: proj(encoding)[0], self._projection_net) return act_dist, state
def test_prune_nest_like(self, prune_nest_like, error): ntuple = NTuple( a=dict(x=torch.zeros(()), y=torch.zeros((2, 4))), b=NTuple(a=torch.zeros((4, )), b=[1])) spec = NTuple(a=dict(y=TensorSpec(())), b=NTuple(b=[TensorSpec(())])) pruned_ntuple = prune_nest_like(ntuple, spec) nest.map_structure( self.assertEqual, pruned_ntuple, NTuple(a=dict(y=torch.zeros((2, 4))), b=NTuple(b=[1]))) lst1 = [1, 3] lst2 = [None, 1] pruned_lst = prune_nest_like(lst1, lst2) self.assertEqual(pruned_lst, [None, 3]) tuple1 = NTuple(a=1, b=2) tuple2 = NTuple(b=1, a=()) pruned_lst = prune_nest_like(tuple1, tuple2, value_to_match=()) self.assertEqual(pruned_lst, NTuple(a=(), b=2)) d1 = dict(x=1, y=2) d2 = dict(x=1, z=2) self.assertRaises(error, prune_nest_like, d1, d2)
def __init__(self, gym_env, env_id=None, discount=1.0, auto_reset=True, simplify_box_bounds=True): """ Args: gym_env (gym.Env): An instance of OpenAI gym environment. env_id (int): (optional) ID of the environment. discount (float): Discount to use for the environment. auto_reset (bool): whether or not to reset the environment when done. simplify_box_bounds (bool): whether or not to simplify redundant arrays to values for spec bounds. """ super(AlfGymWrapper, self).__init__() self._gym_env = gym_env self._discount = discount if env_id is None: env_id = 0 self._env_id = np.int32(env_id) self._action_is_discrete = isinstance(self._gym_env.action_space, gym.spaces.Discrete) # TODO: Add test for auto_reset param. self._auto_reset = auto_reset self._observation_spec = tensor_spec_from_gym_space( self._gym_env.observation_space, simplify_box_bounds) self._action_spec = tensor_spec_from_gym_space( self._gym_env.action_space, simplify_box_bounds) if hasattr(self._gym_env, "reward_space"): self._reward_spec = tensor_spec_from_gym_space( self._gym_env.reward_space, simplify_box_bounds) else: self._reward_spec = TensorSpec(()) self._time_step_spec = ds.time_step_spec( self._observation_spec, self._action_spec, self._reward_spec) self._info = None self._done = True self._zero_info = self._obtain_zero_info() self._env_info_spec = nest.map_structure(TensorSpec.from_array, self._zero_info)
def compute_log_probability(distributions, actions): """Computes log probability of actions given distribution. Args: distributions: A possibly batched tuple of distributions. actions: A possibly batched action tuple. Returns: Tensor: the log probability summed over actions in the batch. """ def _compute_log_prob(single_distribution, single_action): single_log_prob = single_distribution.log_prob(single_action) return single_log_prob nest.assert_same_structure(distributions, actions) log_probs = nest.map_structure(_compute_log_prob, distributions, actions) total_log_probs = sum(nest.flatten(log_probs)) return total_log_probs
def _to_spec_dtype_observation(self, observation): """Make sure observation from env is converted to the correct dtype. Args: observation (nested arrays or tensors): observations from env. Returns: A (nested) arrays of observation """ def _as_spec_dtype(arr, spec): dtype = torch_dtype_to_str(spec.dtype) if str(arr.dtype) == dtype: return arr else: return arr.astype(dtype) return nest.map_structure(_as_spec_dtype, observation, self._observation_spec)
def encode_step(self, inputs, state: MBPState): """Calculate latent vector. Args: inputs (tuple): a tuple of ``(observation, prev_action)``. state (MBPState): RNN state Returns: AlgStep: - output: latent vector - state: next_state - info (LossInfo): loss """ observation, prev_action = inputs self._memory.from_states(state.memory) prev_action = self._action_encoder(prev_action)[0] prev_rnn_input = torch.cat( [state.latent_vector, prev_action, state.mem_readout], dim=-1) prev_rnn_output, prev_rnn_state = self._rnn(prev_rnn_input, state.rnn_state) prev_mem_readout = self._memory.genkey_and_read( self._key_net, prev_rnn_output) self._memory.write(state.latent_vector.detach()) prior_input = (prev_rnn_output, prev_mem_readout) current_input = map_structure(lambda encoder, obs: encoder(obs)[0], self._encoders, observation) vae_step = self._vae.train_step((prior_input, current_input)) next_state = MBPState( latent_vector=vae_step.output, mem_readout=prev_mem_readout, rnn_state=prev_rnn_state, memory=self._memory.states) return vae_step._replace(state=next_state)
def _create_projection_net(self, discrete_projection_net_ctor, continuous_projection_net_ctor): """If there are :math:`N` action specs, then create :math:`N` projection networks which can be a mixture of categoricals and normals. """ def _create(spec): if spec.is_discrete: net = discrete_projection_net_ctor( input_size=self._encoding_net.output_spec.shape[0], action_spec=spec) else: net = continuous_projection_net_ctor( input_size=self._encoding_net.output_spec.shape[0], action_spec=spec) return net self._projection_net = nest.map_structure(_create, self._action_spec) if nest.is_nested(self._projection_net): # need this for torch to pickup the parameters of all the modules self._projection_net_module_list = nn.ModuleList( nest.flatten(self._projection_net))
def to_distribution_param_spec(nests): """Convert the ``DistributionSpecs`` in nests to their parameter specs. Args: nests (nested DistributionSpec of TensorSpec): Each ``DistributionSpec`` will be converted to a dictionary of the spec of its input ``Tensor`` parameters. Returns: nested TensorSpec: Each leaf is a ``TensorSpec`` or a ``dict`` corresponding to one distribution, with keys as parameter name and values as ``TensorSpecs`` for the parameters. """ def _to_param_spec(spec): if isinstance(spec, DistributionSpec): return spec.input_params_spec elif isinstance(spec, TensorSpec): return spec else: raise ValueError("Only TensorSpec or DistributionSpec is allowed " "in nest, got %s. nest is %s" % (spec, nests)) return nest.map_structure(_to_param_spec, nests)
def extract_spec(nests, from_dim=1): """ Extract ``TensorSpec`` or ``DistributionSpec`` for each element of a nested structure. It assumes that the first dimension of each element is the batch size. Args: nests (nested structure): each leaf node of the nested structure is a Tensor or Distribution of the same batch size. from_dim (int): ignore dimension before this when constructing the spec. Returns: nest: each leaf node of the returned nested spec is the corresponding spec (excluding batch size) of the element of ``nest``. """ def _extract_spec(obj): if isinstance(obj, torch.Tensor): return TensorSpec.from_tensor(obj, from_dim) elif isinstance(obj, td.Distribution): return DistributionSpec.from_distribution(obj, from_dim) else: raise ValueError("Unsupported value type: %s" % type(obj)) return nest.map_structure(_extract_spec, nests)
def distributions_to_params(nests): """Convert distributions to its parameters, and keep tensors unchanged. Only returns parameters that have ``Tensor`` values. Args: nests (nested Distribution and Tensor): Each ``Distribution`` will be converted to dictionary of its ``Tensor`` parameters. Returns: nested Tensor/Distribution: Each leaf is a ``Tensor`` or a ``dict`` corresponding to one distribution, with keys as parameter name and values as tensors containing parameter values. """ def _to_params(dist_or_tensor): if isinstance(dist_or_tensor, td.Distribution): return extract_distribution_parameters(dist_or_tensor) elif isinstance(dist_or_tensor, torch.Tensor): return dist_or_tensor else: raise ValueError( "Only Tensor or Distribution is allowed in nest, ", "got %s. nest is %s" % (dist_or_tensor, nests)) return nest.map_structure(_to_params, nests)
def _tensor_to_array(data): return nest.map_structure(lambda x: x.squeeze(dim=0).cpu().numpy(), data)