Example #1
0
    def _expand_node(
            self,
            trees: _MCTSTree,
            n,  # n-th expansion, zero-based
            to_plays,
            model_output: ModelOutput,
            dirichlet_alpha=None,
            exploration_fraction=0.):
        if self._is_two_player_game:
            trees.to_play[:, n] = to_plays
        if trees.game_over is not None:
            trees.game_over[:, n] = model_output.game_over

        def _set_tree_state(ts, s):
            ts[:, n] = s

        nest.map_structure(_set_tree_state, trees.model_state,
                           model_output.state)
        if trees.reward is not None:
            trees.reward[:, n] = model_output.reward
        if trees.action is not None:
            trees.action[:, n] = model_output.actions
        prior = model_output.action_probs

        if exploration_fraction > 0.:
            batch_size = model_output.action_probs.shape[0]
            noise_dist = td.Dirichlet(
                dirichlet_alpha * torch.ones(trees.branch_factor))
            noise = noise_dist.sample((batch_size, ))
            noise = noise * (prior != 0)
            noise = noise / noise.sum(dim=1, keepdim=True)
            prior = exploration_fraction * noise + (
                1 - exploration_fraction) * prior

        trees.prior[:, n] = prior
Example #2
0
    def test_transform_nest(self):
        ntuple = NTuple(
            a=dict(x=torch.zeros(()), y=torch.zeros((2, 4))),
            b=torch.zeros((4, )))
        transformed_ntuple = transform_nest(
            ntuple, field='a.x', func=lambda x: x + 1.0)
        ntuple.a.update({'x': torch.ones(())})
        nest.map_structure(self.assertEqual, transformed_ntuple, ntuple)

        ntuple = NTuple(
            a=dict(x=torch.zeros(()), y=torch.zeros((2, 4))),
            b=NTuple(a=torch.zeros((4, )), b=NTuple(a=[1], b=[1])))
        transformed_ntuple = transform_nest(
            ntuple, field='b.b.b', func=lambda _: [2])
        ntuple = ntuple._replace(
            b=ntuple.b._replace(b=ntuple.b.b._replace(b=[2])))
        nest.map_structure(self.assertEqual, transformed_ntuple, ntuple)

        ntuple = NTuple(a=1, b=2)
        transformed_ntuple = transform_nest(ntuple, None, NestSum())
        self.assertEqual(transformed_ntuple, 3)

        tuples = [("a", 12), ("b", 13)]
        nested = collections.OrderedDict(tuples)

        def _check_path(path, e):
            self.assertEqual(nested[path], e)

        res = nest.py_map_structure_with_path(_check_path, nested)
        nest.assert_same_structure(nested, res)
Example #3
0
    def testResetSavesCurrentTimeStep(self):
        obs_spec = BoundedTensorSpec((1, ), torch.int32)
        action_spec = BoundedTensorSpec((1, ), torch.int64)

        random_env = RandomAlfEnvironment(observation_spec=obs_spec,
                                          action_spec=action_spec)

        time_step = random_env.reset()
        current_time_step = random_env.current_time_step()
        nest.map_structure(self.assertEqual, time_step, current_time_step)
Example #4
0
 def _obtain_zero_info(self):
     """Get an env info of zeros only once when the env is created.
     This info will be filled in each ``FIRST`` time step as a placeholder.
     """
     self._gym_env.reset()
     action = nest.map_structure(lambda spec: spec.numpy_zeros(),
                                 self._action_spec)
     _, _, _, info = self._gym_env.step(action)
     self._gym_env.reset()
     info = _as_array(info)
     return nest.map_structure(lambda a: np.zeros_like(a), info)
Example #5
0
def rsample_action_distribution(nested_distributions):
    """Sample actions from distributions with reparameterization-based sampling
        (rsample) to enable backpropagation.
    Args:
        nested_distributions (nested Distribution): action distributions.
    Returns:
        rsampled actions
    """
    assert all(nest.flatten(nest.map_structure(lambda d: d.has_rsample,
                nested_distributions))), \
            ("all the distributions need to support rsample in order to enable "
            "backpropagation")
    return nest.map_structure(lambda d: d.rsample(), nested_distributions)
Example #6
0
def _generate_time_step(batched,
                        observation,
                        step_type,
                        discount,
                        prev_action=None,
                        action_spec=None,
                        reward=None,
                        reward_spec=ts.TensorSpec(()),
                        env_id=None,
                        env_info={}):

    flat_observation = nest.flatten(observation)

    if all(map(_is_numpy_array, flat_observation)):
        md = np
        if reward is not None:
            reward = np.float32(reward)
        discount = np.float32(discount)
    else:
        assert all(
            map(torch.is_tensor,
                flat_observation)), ("Elements in observation must be Tensor")
        md = torch
        if reward is not None:
            reward = to_tensor(reward, dtype=torch.float32)
        discount = to_tensor(discount, dtype=torch.float32)

    if batched:
        batch_size = flat_observation[0].shape[0]
        outer_dims = (batch_size, )
        if env_id is None:
            env_id = md.arange(batch_size, dtype=md.int32)
        if reward is not None:
            assert reward.shape[:1] == outer_dims
        if prev_action is not None:
            flat_action = nest.flatten(prev_action)
            assert flat_action[0].shape[:1] == outer_dims
    else:
        outer_dims = ()
        if env_id is None:
            env_id = md.zeros((), dtype=md.int32)

    step_type = md.full(outer_dims, step_type, dtype=md.int32)
    if reward is None:
        reward = md.zeros(outer_dims + reward_spec.shape, dtype=md.float32)
    discount = md.ones(outer_dims, dtype=md.float32) * discount
    if prev_action is None:
        prev_action = nest.map_structure(
            lambda spec: md.zeros(outer_dims + spec.shape,
                                  dtype=getattr(
                                      md, ts.torch_dtype_to_str(spec.dtype))),
            action_spec)

    return TimeStep(step_type,
                    reward,
                    discount,
                    observation,
                    prev_action,
                    env_id,
                    env_info=env_info)
Example #7
0
def epsilon_greedy_sample(nested_distributions, eps=0.1):
    """Generate greedy sample that maximizes the probability.
    Args:
        nested_distributions (nested Distribution): distribution to sample from
        eps (float): a floating value in :math:`[0,1]`, representing the chance of
            action sampling instead of taking argmax. This can help prevent
            a dead loop in some deterministic environment like `Breakout`.
    Returns:
        (nested) Tensor:
    """

    def greedy_fn(dist):
        # pytorch distribution has no 'mode' operation
        greedy_action = get_mode(dist)
        if eps == 0.0:
            return greedy_action
        sample_action = dist.sample()
        greedy_mask = torch.rand(sample_action.shape[0]) > eps
        sample_action[greedy_mask] = greedy_action[greedy_mask]
        return sample_action

    if eps >= 1.0:
        return sample_action_distribution(nested_distributions)
    else:
        return nest.map_structure(greedy_fn, nested_distributions)
Example #8
0
def array_to_tensor(data):
    def _array_to_cpu_tensor(obj):
        return torch.as_tensor(
            obj, device='cpu') if isinstance(obj,
                                             (np.ndarray, np.number)) else obj

    return nest.map_structure(_array_to_cpu_tensor, data)
Example #9
0
    def _step(self, action):
        # Automatically reset the environments on step if they need to be reset.
        if self._auto_reset and self._done:
            return self.reset()

        observation, reward, self._done, self._info = self._gym_env.step(
            action)
        observation = self._to_spec_dtype_observation(observation)
        self._info = nest.map_structure(_as_array, self._info)

        if self._done:
            return ds.termination(
                observation,
                action,
                reward,
                self._reward_spec,
                self._env_id,
                env_info=self._info)
        else:
            return ds.transition(
                observation,
                action,
                reward,
                self._reward_spec,
                self._discount,
                self._env_id,
                env_info=self._info)
Example #10
0
def zeros_from_spec(nested_spec, batch_size):
    """Create nested zero Tensors or Distributions.

    A zero tensor with shape[0]=`batch_size is created for each TensorSpec and
    A distribution with all the parameters as zero Tensors is created for each
    DistributionSpec.

    Args:
        nested_spec (nested TensorSpec or DistributionSpec):
        batch_size (int|tuple|list): batch size/shape added as the first dimension to the shapes
             in TensorSpec
    Returns:
        nested Tensor or Distribution
    """
    if isinstance(batch_size, Iterable):
        shape = batch_size
    else:
        shape = [batch_size]

    def _zero_tensor(spec):
        return spec.zeros(shape)

    param_spec = dist_utils.to_distribution_param_spec(nested_spec)
    params = nest.map_structure(_zero_tensor, param_spec)
    return dist_utils.params_to_distributions(params, nested_spec)
Example #11
0
def tensor_to_array(data):
    def _tensor_to_array(obj):
        if torch.is_tensor(obj):
            return obj.cpu().numpy()
        else:
            return obj

    return nest.map_structure(_tensor_to_array, data)
Example #12
0
 def cpu(self):
     """Get the cpu version of this data structure."""
     r = getattr(self, "_cpu", None)
     if r is None:
         r = nest.map_structure(
             lambda x: x.cpu() if isinstance(x, torch.Tensor) else x, self)
         self._cpu = r
     return r
Example #13
0
def _as_array(nested):
    """Convert numbers in ``nested`` to np.ndarray."""
    def __as_array(x):
        if isinstance(x, numbers.Number):
            return np.array(x)
        return x

    return nest.map_structure(__as_array, nested)
Example #14
0
def sample_action_distribution(nested_distributions):
    """Sample actions from distributions with conventional sampling without
        enabling backpropagation.
    Args:
        nested_distributions (nested Distribution): action distributions.
    Returns:
        sampled actions
    """
    return nest.map_structure(lambda d: d.sample(), nested_distributions)
Example #15
0
def detach(nests):
    """Detach nested Tensors.

    Args:
        nests (nested Tensor): tensors to be detached
    Returns:
        detached Tensors with same structure as nests
    """
    return nest.map_structure(lambda t: t.detach(), nests)
Example #16
0
 def _unstack_actions(self, batched_actions):
     """Returns a list of actions from potentially nested batch of actions."""
     batched_actions = nest.map_structure(lambda x: x.cpu(),
                                          batched_actions)
     flattened_actions = nest.flatten(batched_actions)
     if self._flatten:
         unstacked_actions = zip(*flattened_actions)
     else:
         unstacked_actions = [
             nest.pack_sequence_as(batched_actions, actions)
             for actions in zip(*flattened_actions)
         ]
     return unstacked_actions
Example #17
0
 def _stack_time_steps(self, time_steps):
     """Given a list of TimeStep, combine to one with a batch dimension."""
     if self._flatten:
         stacked = nest.fast_map_structure_flatten(
             lambda *arrays: torch.stack(arrays),
             self._time_step_with_env_info_spec, *time_steps)
     else:
         stacked = nest.fast_map_structure(
             lambda *arrays: torch.stack(arrays), *time_steps)
     if alf.get_default_device() == "cuda":
         cpu = stacked
         stacked = nest.map_structure(lambda x: x.cuda(), cpu)
         stacked._cpu = cpu
     return stacked
Example #18
0
def _check_action_specs_for_critic_networks(action_spec,
                                            action_input_processors,
                                            action_preprocessing_combiner):

    if len(nest.flatten(action_spec)) > 1:
        assert action_preprocessing_combiner is not None, (
            "An action combiner is needed when there are multiple action specs:"
            " {}".format(action_spec))

    def _check_individual(spec, proc):
        if spec.is_discrete:
            assert proc is not None, (
                'CriticNetwork only supports continuous actions. One of given '
                + 'action specs {} is discrete. Use QNetwork instead. '.format(
                    spec) +
                'Alternatively, specify `action_input_processors` to transform '
                + 'discrete actions to continuous action embeddings first.')

    if action_input_processors is None:
        action_input_processors = nest.map_structure(lambda _: None,
                                                     action_spec)

    nest.map_structure(_check_individual, action_spec, action_input_processors)
Example #19
0
def compute_entropy(distributions):
    """Computes total entropy of nested distribution.
    Args:
        distributions (nested Distribution): A possibly batched tuple of
            distributions.
    Returns:
        entropy
    """
    def _compute_entropy(dist: td.Distribution):
        entropy = dist.entropy()
        return entropy

    entropies = nest.map_structure(_compute_entropy, distributions)
    total_entropies = sum(nest.flatten(entropies))
    return total_entropies
Example #20
0
    def forward(self, observation, state=()):
        """Computes an action distribution given an observation.

        Args:
            observation (torch.Tensor): consistent with ``input_tensor_spec``
            state: empty for API consistent with ``ActorRNNDistributionNetwork``

        Returns:
            act_dist (torch.distributions): action distribution
            state: empty
        """
        encoding, state = self._encoding_net(observation, state)
        act_dist = nest.map_structure(lambda proj: proj(encoding)[0],
                                      self._projection_net)
        return act_dist, state
Example #21
0
    def test_prune_nest_like(self, prune_nest_like, error):
        ntuple = NTuple(
            a=dict(x=torch.zeros(()), y=torch.zeros((2, 4))),
            b=NTuple(a=torch.zeros((4, )), b=[1]))
        spec = NTuple(a=dict(y=TensorSpec(())), b=NTuple(b=[TensorSpec(())]))
        pruned_ntuple = prune_nest_like(ntuple, spec)

        nest.map_structure(
            self.assertEqual, pruned_ntuple,
            NTuple(a=dict(y=torch.zeros((2, 4))), b=NTuple(b=[1])))

        lst1 = [1, 3]
        lst2 = [None, 1]
        pruned_lst = prune_nest_like(lst1, lst2)
        self.assertEqual(pruned_lst, [None, 3])

        tuple1 = NTuple(a=1, b=2)
        tuple2 = NTuple(b=1, a=())
        pruned_lst = prune_nest_like(tuple1, tuple2, value_to_match=())
        self.assertEqual(pruned_lst, NTuple(a=(), b=2))

        d1 = dict(x=1, y=2)
        d2 = dict(x=1, z=2)
        self.assertRaises(error, prune_nest_like, d1, d2)
Example #22
0
    def __init__(self,
                 gym_env,
                 env_id=None,
                 discount=1.0,
                 auto_reset=True,
                 simplify_box_bounds=True):
        """

        Args:
            gym_env (gym.Env): An instance of OpenAI gym environment.
            env_id (int): (optional) ID of the environment.
            discount (float): Discount to use for the environment.
            auto_reset (bool): whether or not to reset the environment when done.
            simplify_box_bounds (bool): whether or not to simplify redundant
                arrays to values for spec bounds.

        """
        super(AlfGymWrapper, self).__init__()

        self._gym_env = gym_env
        self._discount = discount
        if env_id is None:
            env_id = 0
        self._env_id = np.int32(env_id)
        self._action_is_discrete = isinstance(self._gym_env.action_space,
                                              gym.spaces.Discrete)
        # TODO: Add test for auto_reset param.
        self._auto_reset = auto_reset
        self._observation_spec = tensor_spec_from_gym_space(
            self._gym_env.observation_space, simplify_box_bounds)
        self._action_spec = tensor_spec_from_gym_space(
            self._gym_env.action_space, simplify_box_bounds)
        if hasattr(self._gym_env, "reward_space"):
            self._reward_spec = tensor_spec_from_gym_space(
                self._gym_env.reward_space, simplify_box_bounds)
        else:
            self._reward_spec = TensorSpec(())
        self._time_step_spec = ds.time_step_spec(
            self._observation_spec, self._action_spec, self._reward_spec)
        self._info = None
        self._done = True
        self._zero_info = self._obtain_zero_info()

        self._env_info_spec = nest.map_structure(TensorSpec.from_array,
                                                 self._zero_info)
Example #23
0
def compute_log_probability(distributions, actions):
    """Computes log probability of actions given distribution.

    Args:
        distributions: A possibly batched tuple of distributions.
        actions: A possibly batched action tuple.

    Returns:
        Tensor: the log probability summed over actions in the batch.
    """
    def _compute_log_prob(single_distribution, single_action):
        single_log_prob = single_distribution.log_prob(single_action)
        return single_log_prob

    nest.assert_same_structure(distributions, actions)
    log_probs = nest.map_structure(_compute_log_prob, distributions, actions)
    total_log_probs = sum(nest.flatten(log_probs))
    return total_log_probs
Example #24
0
    def _to_spec_dtype_observation(self, observation):
        """Make sure observation from env is converted to the correct dtype.

        Args:
            observation (nested arrays or tensors): observations from env.

        Returns:
            A (nested) arrays of observation
        """
        def _as_spec_dtype(arr, spec):
            dtype = torch_dtype_to_str(spec.dtype)
            if str(arr.dtype) == dtype:
                return arr
            else:
                return arr.astype(dtype)

        return nest.map_structure(_as_spec_dtype, observation,
                                  self._observation_spec)
Example #25
0
    def encode_step(self, inputs, state: MBPState):
        """Calculate latent vector.

        Args:
            inputs (tuple): a tuple of ``(observation, prev_action)``.
            state (MBPState): RNN state
        Returns:
            AlgStep:
            - output: latent vector
            - state: next_state
            - info (LossInfo): loss
        """
        observation, prev_action = inputs
        self._memory.from_states(state.memory)

        prev_action = self._action_encoder(prev_action)[0]

        prev_rnn_input = torch.cat(
            [state.latent_vector, prev_action, state.mem_readout], dim=-1)

        prev_rnn_output, prev_rnn_state = self._rnn(prev_rnn_input,
                                                    state.rnn_state)

        prev_mem_readout = self._memory.genkey_and_read(
            self._key_net, prev_rnn_output)

        self._memory.write(state.latent_vector.detach())

        prior_input = (prev_rnn_output, prev_mem_readout)

        current_input = map_structure(lambda encoder, obs: encoder(obs)[0],
                                      self._encoders, observation)

        vae_step = self._vae.train_step((prior_input, current_input))

        next_state = MBPState(
            latent_vector=vae_step.output,
            mem_readout=prev_mem_readout,
            rnn_state=prev_rnn_state,
            memory=self._memory.states)

        return vae_step._replace(state=next_state)
    def _create_projection_net(self, discrete_projection_net_ctor,
                               continuous_projection_net_ctor):
        """If there are :math:`N` action specs, then create :math:`N` projection
        networks which can be a mixture of categoricals and normals.
        """
        def _create(spec):
            if spec.is_discrete:
                net = discrete_projection_net_ctor(
                    input_size=self._encoding_net.output_spec.shape[0],
                    action_spec=spec)
            else:
                net = continuous_projection_net_ctor(
                    input_size=self._encoding_net.output_spec.shape[0],
                    action_spec=spec)
            return net

        self._projection_net = nest.map_structure(_create, self._action_spec)
        if nest.is_nested(self._projection_net):
            # need this for torch to pickup the parameters of all the modules
            self._projection_net_module_list = nn.ModuleList(
                nest.flatten(self._projection_net))
Example #27
0
def to_distribution_param_spec(nests):
    """Convert the ``DistributionSpecs`` in nests to their parameter specs.

    Args:
        nests (nested DistributionSpec of TensorSpec):  Each ``DistributionSpec``
            will be converted to a dictionary of the spec of its input ``Tensor``
            parameters.
    Returns:
        nested TensorSpec: Each leaf is a ``TensorSpec`` or a ``dict``
        corresponding to one distribution, with keys as parameter name and
        values as ``TensorSpecs`` for the parameters.
    """
    def _to_param_spec(spec):
        if isinstance(spec, DistributionSpec):
            return spec.input_params_spec
        elif isinstance(spec, TensorSpec):
            return spec
        else:
            raise ValueError("Only TensorSpec or DistributionSpec is allowed "
                             "in nest, got %s. nest is %s" % (spec, nests))

    return nest.map_structure(_to_param_spec, nests)
Example #28
0
def extract_spec(nests, from_dim=1):
    """
    Extract ``TensorSpec`` or ``DistributionSpec`` for each element of a nested
    structure. It assumes that the first dimension of each element is the batch
    size.

    Args:
        nests (nested structure): each leaf node of the nested structure is a
            Tensor or Distribution of the same batch size.
        from_dim (int): ignore dimension before this when constructing the spec.
    Returns:
        nest: each leaf node of the returned nested spec is the corresponding
        spec (excluding batch size) of the element of ``nest``.
    """
    def _extract_spec(obj):
        if isinstance(obj, torch.Tensor):
            return TensorSpec.from_tensor(obj, from_dim)
        elif isinstance(obj, td.Distribution):
            return DistributionSpec.from_distribution(obj, from_dim)
        else:
            raise ValueError("Unsupported value type: %s" % type(obj))

    return nest.map_structure(_extract_spec, nests)
Example #29
0
def distributions_to_params(nests):
    """Convert distributions to its parameters, and keep tensors unchanged.
    Only returns parameters that have ``Tensor`` values.

    Args:
        nests (nested Distribution and Tensor): Each ``Distribution`` will be
            converted to dictionary of its ``Tensor`` parameters.
    Returns:
        nested Tensor/Distribution: Each leaf is a ``Tensor`` or a ``dict``
        corresponding to one distribution, with keys as parameter name and
        values as tensors containing parameter values.
    """
    def _to_params(dist_or_tensor):
        if isinstance(dist_or_tensor, td.Distribution):
            return extract_distribution_parameters(dist_or_tensor)
        elif isinstance(dist_or_tensor, torch.Tensor):
            return dist_or_tensor
        else:
            raise ValueError(
                "Only Tensor or Distribution is allowed in nest, ",
                "got %s. nest is %s" % (dist_or_tensor, nests))

    return nest.map_structure(_to_params, nests)
Example #30
0
def _tensor_to_array(data):
    return nest.map_structure(lambda x: x.squeeze(dim=0).cpu().numpy(), data)