예제 #1
0
    def __init__(self,
                 input_size,
                 action_spec,
                 logits_init_output_factor=0.1,
                 name="CategoricalProjectionNetwork"):
        """Creates a categorical projection network that outputs a discrete
        distribution over a number of classes.

        Currently there seems no need for this class to handle nested inputs;
        If necessary, extend the argument list to support it in the future.

        Args:
            input_size (int): the input vector size
            action_spec (BounedTensorSpec): a tensor spec containing the information
                of the output distribution.
            name (str):
        """
        super(CategoricalProjectionNetwork,
              self).__init__(input_tensor_spec=TensorSpec((input_size, )),
                             name=name)

        unique_num_actions = np.unique(action_spec.maximum -
                                       action_spec.minimum + 1)
        if len(unique_num_actions) > 1 or np.any(unique_num_actions <= 0):
            raise ValueError(
                'Bounds on discrete actions must be the same for all '
                'dimensions and have at least 1 action. Projection '
                'Network requires num_actions to be equal across '
                'action dimensions. Implement a more general '
                'categorical projection if you need more flexibility.')

        output_shape = action_spec.shape + (int(unique_num_actions), )
        self._output_shape = output_shape

        self._projection_layer = layers.FC(
            input_size,
            np.prod(output_shape),
            kernel_init_gain=logits_init_output_factor)
예제 #2
0
    def __init__(self,
                 input_size,
                 action_spec,
                 activation=math_ops.identity,
                 projection_output_init_gain=0.3,
                 std_bias_initializer_value=0.0,
                 squash_mean=True,
                 state_dependent_std=False,
                 std_transform=nn.functional.softplus,
                 scale_distribution=False,
                 dist_squashing_transform=dist_utils.StableTanh(),
                 name="NormalProjectionNetwork"):
        """Creates an instance of NormalProjectionNetwork.

        Currently there seems no need for this class to handle nested inputs;
        If necessary, extend the argument list to support it in the future.

        Args:
            input_size (int): input vector dimension
            action_spec (TensorSpec): a tensor spec containing the information
                of the output distribution.
            activation (Callable): activation function to use in
                dense layers.
            projection_output_init_gain (float): Output gain for initializing
                action means and std weights.
            std_bias_initializer_value (float): Initial value for the bias of the
                ``std_projection_layer``.
            squash_mean (bool): If True, squash the output mean to fit the
                action spec. If ``scale_distribution`` is also True, this value
                will be ignored.
            state_dependent_std (bool): If True, std will be generated depending
                on the current state; otherwise a global std will be generated
                regardless of the current state.
            std_transform (Callable): Transform to apply to the std, on top of
                `activation`.
            scale_distribution (bool): Whether or not to scale the output
                distribution to ensure that the output aciton fits within the
                `action_spec`. Note that this is different from `mean_transform`
                which merely squashes the mean to fit within the spec.
            dist_squashing_transform (td.Transform):  A distribution Transform
                which transforms values into :math:`(-1, 1)`. Default to ``dist_utils.StableTanh()``
            name (str): name of this network.
        """
        super(NormalProjectionNetwork,
              self).__init__(input_tensor_spec=TensorSpec((input_size, )),
                             name=name)

        assert isinstance(action_spec, TensorSpec)
        assert len(action_spec.shape) == 1, "Only support 1D action spec!"

        self._action_spec = action_spec
        self._mean_transform = math_ops.identity
        self._scale_distribution = scale_distribution

        if squash_mean or scale_distribution:
            assert isinstance(action_spec, BoundedTensorSpec), \
                ("When squashing the mean or scaling the distribution, bounds "
                 + "are required for the action spec!")

            action_high = torch.as_tensor(action_spec.maximum)
            action_low = torch.as_tensor(action_spec.minimum)
            self._action_means = (action_high + action_low) / 2
            self._action_magnitudes = (action_high - action_low) / 2
            # Do not transform mean if scaling distribution
            if not scale_distribution:
                self._mean_transform = (
                    lambda inputs: self._action_means + self._action_magnitudes
                    * inputs.tanh())
            else:
                self._transforms = [
                    dist_squashing_transform,
                    dist_utils.AffineTransform(loc=self._action_means,
                                               scale=self._action_magnitudes)
                ]

        self._std_transform = math_ops.identity
        if std_transform is not None:
            self._std_transform = std_transform

        self._means_projection_layer = layers.FC(
            input_size,
            action_spec.shape[0],
            activation=activation,
            kernel_init_gain=projection_output_init_gain)

        if state_dependent_std:
            self._std_projection_layer = layers.FC(
                input_size,
                action_spec.shape[0],
                activation=activation,
                kernel_init_gain=projection_output_init_gain,
                bias_init_value=std_bias_initializer_value)
        else:
            self._std = nn.Parameter(
                action_spec.constant(std_bias_initializer_value),
                requires_grad=True)
            self._std_projection_layer = lambda _: self._std
예제 #3
0
파일: q_networks.py 프로젝트: soychanq/alf
    def __init__(self,
                 input_tensor_spec: TensorSpec,
                 action_spec: BoundedTensorSpec,
                 input_preprocessors=None,
                 preprocessing_combiner=None,
                 conv_layer_params=None,
                 fc_layer_params=None,
                 activation=torch.relu_,
                 kernel_initializer=None,
                 name="QNetwork"):
        """Creates an instance of ``QNetwork`` for estimating action-value of
        discrete actions. The action-value is defined as the expected return
        starting from the given input observation and taking the given action.
        It takes observation as input and outputs an action-value tensor with
        the shape of ``[batch_size, num_of_actions]``.

        Args:
            input_tensor_spec (TensorSpec): the tensor spec of the input
            action_spec (TensorSpec): the tensor spec of the action
            input_preprocessors (nested InputPreprocessor): a nest of
                ``InputPreprocessor``, each of which will be applied to the
                corresponding input. If not None, then it must
                have the same structure with ``input_tensor_spec`` (after reshaping).
                If any element is None, then it will be treated as ``math_ops.identity``.
                This arg is helpful if you want to have separate preprocessings
                for different inputs by configuring a gin file without changing
                the code. For example, embedding a discrete input before concatenating
                it to another continuous vector.
            preprocessing_combiner (NestCombiner): preprocessing called on
                complex inputs. Note that this combiner must also accept
                ``input_tensor_spec`` as the input to compute the processed
                tensor spec. For example, see ``alf.nest.utils.NestConcat``. This
                arg is helpful if you want to combine inputs by configuring a
                gin file without changing the code.
            conv_layer_params (tuple[tuple]): a tuple of tuples where each
                tuple takes a format ``(filters, kernel_size, strides, padding)``,
                where ``padding`` is optional.
            fc_layer_params (tuple[int]): a tuple of integers representing hidden
                FC layer sizes.
            activation (nn.functional): activation used for hidden layers. The
                last layer will not be activated.
            kernel_initializer (Callable): initializer for all the layers but
                the last layer. If none is provided a default ``variance_scaling_initializer``
                will be used.
        """
        super(QNetwork, self).__init__(input_tensor_spec, name=name)

        assert len(nest.flatten(action_spec)) == 1, (
            "Currently only support a single discrete action! Use "
            "CriticNetwork instead for multiple actions.")

        num_actions = action_spec.maximum - action_spec.minimum + 1
        self._output_spec = TensorSpec((num_actions, ))

        self._encoding_net = EncodingNetwork(
            input_tensor_spec=input_tensor_spec,
            input_preprocessors=input_preprocessors,
            preprocessing_combiner=preprocessing_combiner,
            conv_layer_params=conv_layer_params,
            fc_layer_params=fc_layer_params,
            activation=activation,
            kernel_initializer=kernel_initializer)

        last_kernel_initializer = functools.partial(torch.nn.init.uniform_, \
                                    a=-0.003, b=0.003)

        self._final_layer = layers.FC(
            self._encoding_net.output_spec.shape[0],
            num_actions,
            activation=math_ops.identity,
            kernel_initializer=last_kernel_initializer,
            bias_init_value=-0.2)
예제 #4
0
    def __init__(self,
                 input_tensor_spec,
                 output_tensor_spec=None,
                 input_preprocessors=None,
                 preprocessing_combiner=None,
                 conv_layer_params=None,
                 fc_layer_params=None,
                 activation=torch.relu_,
                 kernel_initializer=None,
                 use_fc_bn=False,
                 last_layer_size=None,
                 last_activation=None,
                 last_kernel_initializer=None,
                 last_use_fc_bn=False,
                 name="EncodingNetwork"):
        """
        Args:
            input_tensor_spec (nested TensorSpec): the (nested) tensor spec of
                the input. If nested, then ``preprocessing_combiner`` must not be
                None.
            output_tensor_spec (None|TensorSpec): spec for the output. If None,
                the output tensor spec will be assumed as
                ``TensorSpec((output_size, ))``, where ``output_size`` is
                inferred from network output. Otherwise, the output tensor
                spec will be ``output_tensor_spec`` and the network output
                will be reshaped according to ``output_tensor_spec``.
                Note that ``output_tensor_spec`` is only used for reshaping
                the network outputs for interpretation purpose and is not used
                for specifying any network layers.
            input_preprocessors (nested InputPreprocessor): a nest of
                ``InputPreprocessor``, each of which will be applied to the
                corresponding input. If not None, then it must have the same
                structure with ``input_tensor_spec``. This arg is helpful if you
                want to have separate preprocessings for different inputs by
                configuring a gin file without changing the code. For example,
                embedding a discrete input before concatenating it to another
                continuous vector.
            preprocessing_combiner (NestCombiner): preprocessing called on
                complex inputs. Note that this combiner must also accept
                ``input_tensor_spec`` as the input to compute the processed
                tensor spec. For example, see ``alf.nest.utils.NestConcat``. This
                arg is helpful if you want to combine inputs by configuring a
                gin file without changing the code.
            conv_layer_params (tuple[tuple]): a tuple of tuples where each
                tuple takes a format ``(filters, kernel_size, strides, padding)``,
                where ``padding`` is optional.
            fc_layer_params (tuple[int]): a tuple of integers
                representing FC layer sizes.
            activation (nn.functional): activation used for all the layers but
                the last layer.
            kernel_initializer (Callable): initializer for all the layers but
                the last layer. If None, a variance_scaling_initializer will be
                used.
            use_fc_bn (bool): whether use Batch Normalization for fc layers.
            last_layer_size (int): an optional size of an additional layer
                appended at the very end. Note that if ``last_activation`` is
                specified, ``last_layer_size`` has to be specified explicitly.
            last_activation (nn.functional): activation function of the
                additional layer specified by ``last_layer_size``. Note that if
                ``last_layer_size`` is not None, ``last_activation`` has to be
                specified explicitly.
            last_use_fc_bn (bool): whether use Batch Normalization for the last
                fc layer.
            last_kernel_initializer (Callable): initializer for the the
                additional layer specified by ``last_layer_size``.
                If None, it will be the same with ``kernel_initializer``. If
                ``last_layer_size`` is None, ``last_kernel_initializer`` will
                not be used.
            name (str):
        """
        super().__init__(input_tensor_spec,
                         input_preprocessors,
                         preprocessing_combiner,
                         name=name)

        if kernel_initializer is None:
            kernel_initializer = functools.partial(
                variance_scaling_init,
                mode='fan_in',
                distribution='truncated_normal',
                nonlinearity=activation)

        self._img_encoding_net = None
        if conv_layer_params:
            assert isinstance(conv_layer_params, tuple), \
                "The input params {} should be tuple".format(conv_layer_params)
            assert len(self._processed_input_tensor_spec.shape) == 3, \
                "The input shape {} should be like (C,H,W)!".format(
                    self._processed_input_tensor_spec.shape)
            input_channels, height, width = self._processed_input_tensor_spec.shape
            self._img_encoding_net = ImageEncodingNetwork(
                input_channels, (height, width),
                conv_layer_params,
                activation=activation,
                kernel_initializer=kernel_initializer,
                flatten_output=True)
            input_size = self._img_encoding_net.output_spec.shape[0]
        else:
            assert self._processed_input_tensor_spec.ndim == 1, \
                "The input shape {} should be like (N,)!".format(
                    self._processed_input_tensor_spec.shape)
            input_size = self._processed_input_tensor_spec.shape[0]

        self._fc_layers = nn.ModuleList()
        if fc_layer_params is None:
            fc_layer_params = []
        else:
            assert isinstance(fc_layer_params, tuple)
            fc_layer_params = list(fc_layer_params)

        for size in fc_layer_params:
            self._fc_layers.append(
                layers.FC(input_size,
                          size,
                          activation=activation,
                          use_bn=use_fc_bn,
                          kernel_initializer=kernel_initializer))
            input_size = size

        if last_layer_size is not None or last_activation is not None:
            assert last_layer_size is not None and last_activation is not None, \
            "Both last_layer_size and last_activation need to be specified!"

            if last_kernel_initializer is None:
                common.warning_once(
                    "last_kernel_initializer is not specified "
                    "for the last layer of size {}.".format(last_layer_size))
                last_kernel_initializer = kernel_initializer

            self._fc_layers.append(
                layers.FC(input_size,
                          last_layer_size,
                          activation=last_activation,
                          use_bn=last_use_fc_bn,
                          kernel_initializer=last_kernel_initializer))
            input_size = last_layer_size

        if output_tensor_spec is not None:
            assert output_tensor_spec.numel == input_size, (
                "network output "
                "size {a} is inconsisent with specified out_tensor_spec "
                "of size {b}".format(a=input_size, b=output_tensor_spec.numel))
            self._output_spec = TensorSpec(
                output_tensor_spec.shape,
                dtype=self._processed_input_tensor_spec.dtype)
            self._reshape_output = True
        else:
            self._output_spec = TensorSpec(
                (input_size, ), dtype=self._processed_input_tensor_spec.dtype)
            self._reshape_output = False
예제 #5
0
    def __init__(self,
                 input_size,
                 transconv_layer_params,
                 start_decoding_size,
                 start_decoding_channels,
                 same_padding=False,
                 preprocess_fc_layer_params=None,
                 activation=torch.relu_,
                 kernel_initializer=None,
                 output_activation=torch.tanh,
                 name="ImageDecodingNetwork"):
        """
        Initialize the layers for decoding a latent vector into an image.
        Currently there seems no need for this class to handle nested inputs;
        If necessary, extend the argument list to support it in the future.

        How to calculate the output size:
        `<https://pytorch.org/docs/stable/nn.html#torch.nn.ConvTranspose2d>`_::

            H = (H1-1) * strides + HF - 2P + OP

        where H = output size, H1 = input size, HF = size of kernel, P = padding,
        OP = output_padding (currently hardcoded to be 0 for this class).

        Regarding padding: in the previous TF version, we have two padding modes:
        ``valid`` and ``same``. For the former, we always have no padding (P=0); for
        the latter, it's also called ``half padding`` (P=(HF-1)//2 when strides=1
        and HF is an odd number the output has the same size with the input.
        Currently, PyTorch doesn't support different left and right paddings and
        P is always (HF-1)//2. So if HF is an even number, the output size will
        increaseby 1 when strides=1).

        Args:
            input_size (int): the size of the input latent vector
            transconv_layer_params (tuple[tuple]): a non-empty
                tuple of tuple (num_filters, kernel_size, strides, padding),
                where ``padding`` is optional.
            start_decoding_size (int or tuple): the initial height and width
                we'd like to have for the feature map
            start_decoding_channels (int): the initial number of channels we'd
                like to have for the feature map. Note that we always first
                project an input latent vector into a vector of an appropriate
                length so that it can be reshaped into (``start_decoding_channels``,
                ``start_decoding_height``, ``start_decoding_width``).
            same_padding (bool): similar to TF's conv2d ``same`` padding mode. If
                True, the user provided paddings in ``transconv_layer_params`` will
                be replaced by automatically calculated ones; if False, it
                corresponds to TF's ``valid`` padding mode (the user can still
                provide custom paddings though).
            preprocess_fc_layer_params (tuple[int]): a tuple of fc
                layer units. These fc layers are used for preprocessing the
                latent vector before transposed convolutions.
            activation (nn.functional): activation for hidden layers
            kernel_initializer (Callable): initializer for all the layers.
            output_activation (nn.functional): activation for the output layer.
                Usually our image inputs are normalized to [0, 1] or [-1, 1],
                so this function should be ``torch.sigmoid`` or
                ``torch.tanh``.
            name (str):
        """
        super().__init__(input_tensor_spec=TensorSpec((input_size, )),
                         name=name)

        assert isinstance(transconv_layer_params, tuple)
        assert len(transconv_layer_params) > 0

        self._preprocess_fc_layers = nn.ModuleList()
        if preprocess_fc_layer_params is not None:
            for size in preprocess_fc_layer_params:
                self._preprocess_fc_layers.append(
                    layers.FC(input_size,
                              size,
                              activation=activation,
                              kernel_initializer=kernel_initializer))
                input_size = size

        start_decoding_size = common.tuplify2d(start_decoding_size)
        # pytorch assumes "channels_first" !
        self._start_decoding_shape = [
            start_decoding_channels, start_decoding_size[0],
            start_decoding_size[1]
        ]
        self._preprocess_fc_layers.append(
            layers.FC(input_size,
                      np.prod(self._start_decoding_shape),
                      activation=activation,
                      kernel_initializer=kernel_initializer))

        self._transconv_layer_params = transconv_layer_params
        self._transconv_layers = nn.ModuleList()
        in_channels = start_decoding_channels
        for i, paras in enumerate(transconv_layer_params):
            filters, kernel_size, strides = paras[:3]
            padding = paras[3] if len(paras) > 3 else 0
            if same_padding:  # overwrite paddings
                kernel_size = common.tuplify2d(kernel_size)
                padding = ((kernel_size[0] - 1) // 2,
                           (kernel_size[1] - 1) // 2)
            act = activation
            if i == len(transconv_layer_params) - 1:
                act = output_activation
            self._transconv_layers.append(
                layers.ConvTranspose2D(in_channels,
                                       filters,
                                       kernel_size,
                                       activation=act,
                                       kernel_initializer=kernel_initializer,
                                       strides=strides,
                                       padding=padding))
            in_channels = filters
예제 #6
0
    def __init__(self,
                 input_tensor_spec: TensorSpec,
                 action_spec: BoundedTensorSpec,
                 input_preprocessors=None,
                 preprocessing_combiner=None,
                 conv_layer_params=None,
                 fc_layer_params=None,
                 activation=torch.relu_,
                 squashing_func=torch.tanh,
                 kernel_initializer=None,
                 name="ActorNetwork"):
        """Creates an instance of ``ActorNetwork``, which maps the inputs to
        actions (single or nested) through a sequence of deterministic layers.

        Args:
            input_tensor_spec (TensorSpec): the tensor spec of the input.
            action_spec (BoundedTensorSpec): the tensor spec of the action.
            input_preprocessors (nested InputPreprocessor): a nest of
                ``InputPreprocessor``, each of which will be applied to the
                corresponding input. If not None, then it must
                have the same structure with ``input_tensor_spec`` (after reshaping).
                If any element is None, then it will be treated as ``math_ops.identity``.
                This arg is helpful if you want to have separate preprocessings
                for different inputs by configuring a gin file without changing
                the code. For example, embedding a discrete input before concatenating
                it to another continuous vector.
            preprocessing_combiner (NestCombiner): preprocessing called on
                complex inputs. Note that this combiner must also accept
                ``input_tensor_spec`` as the input to compute the processed
                tensor spec. For example, see ``alf.nest.utils.NestConcat``. This
                arg is helpful if you want to combine inputs by configuring a
                gin file without changing the code.
            conv_layer_params (tuple[tuple]): a tuple of tuples where each
                tuple takes a format ``(filters, kernel_size, strides, padding)``,
                where ``padding`` is optional.
            fc_layer_params (tuple[int]): a tuple of integers representing hidden
                FC layer sizes.
            activation (nn.functional): activation used for hidden layers. The
                last layer will not be activated.
            squashing_func (Callable): the activation function used to squashing
                the output to the range :math:`(-1, 1)`. Default to ``tanh``.
            kernel_initializer (Callable): initializer for all the layers but
                the last layer. If none is provided a ``variance_scaling_initializer``
                with uniform distribution will be used.
            name (str): name of the network
        """
        super(ActorNetwork, self).__init__(input_tensor_spec,
                                           input_preprocessors,
                                           preprocessing_combiner,
                                           name=name)

        if kernel_initializer is None:
            kernel_initializer = functools.partial(variance_scaling_init,
                                                   gain=math.sqrt(1.0 / 3),
                                                   mode='fan_in',
                                                   distribution='uniform')

        self._action_spec = action_spec
        flat_action_spec = nest.flatten(action_spec)
        self._flat_action_spec = flat_action_spec

        is_continuous = [
            single_action_spec.is_continuous
            for single_action_spec in flat_action_spec
        ]

        assert all(is_continuous), "only continuous action is supported"

        self._encoding_net = EncodingNetwork(
            input_tensor_spec=self._processed_input_tensor_spec,
            conv_layer_params=conv_layer_params,
            fc_layer_params=fc_layer_params,
            activation=activation,
            kernel_initializer=kernel_initializer,
            name=self.name + ".encoding_net")

        last_kernel_initializer = functools.partial(torch.nn.init.uniform_, \
                                    a=-0.003, b=0.003)
        self._action_layers = nn.ModuleList()
        self._squashing_func = squashing_func
        for single_action_spec in flat_action_spec:
            self._action_layers.append(
                layers.FC(self._encoding_net.output_spec.shape[0],
                          single_action_spec.shape[0],
                          kernel_initializer=last_kernel_initializer))