def __init__(self,
                 train_state_spec,
                 action_spec,
                 feature_spec,
                 hidden_size=256,
                 dynamics_network: Network = None,
                 name="DynamicsLearningAlgorithm"):
        """Create a DynamicsLearningAlgorithm.

        Args:
            hidden_size (int|tuple): size of hidden layer(s)
            dynamics_network (Network): network for predicting the change of
                the next feature based on the previous feature and action.
                It should accept input with spec of the format
                [feature_spec, encoded_action_spec] and output a tensor of the
                shape feature_spec. For discrete action case, encoded_action
                is a one-hot representation of the action. For continuous
                action, encoded action is the original action.
        """
        super().__init__(train_state_spec=train_state_spec, name=name)

        flat_action_spec = nest.flatten(action_spec)
        assert len(flat_action_spec) == 1, "doesn't support nested action_spec"

        flat_feature_spec = nest.flatten(feature_spec)
        assert len(
            flat_feature_spec) == 1, "doesn't support nested feature_spec"

        action_spec = flat_action_spec[0]

        if action_spec.is_discrete:
            self._num_actions = action_spec.maximum - action_spec.minimum + 1
        else:
            self._num_actions = action_spec.shape[-1]

        self._action_spec = action_spec
        self._feature_spec = feature_spec

        feature_dim = flat_feature_spec[0].shape[-1]

        if isinstance(hidden_size, int):
            hidden_size = (hidden_size, )

        if dynamics_network is None:
            encoded_action_spec = TensorSpec((self._num_actions, ),
                                             dtype=torch.float32)
            dynamics_network = EncodingNetwork(
                name="dynamics_net",
                input_tensor_spec=(feature_spec, encoded_action_spec),
                preprocessing_combiner=NestConcat(),
                fc_layer_params=hidden_size,
                last_layer_size=feature_dim,
                last_activation=math_ops.identity)

        self._dynamics_network = dynamics_network
Beispiel #2
0
 def setUp(self):
     input_tensor_spec = TensorSpec((10, ))
     self._time_step = TimeStep(
         step_type=torch.tensor(StepType.MID, dtype=torch.int32),
         reward=0,
         discount=1,
         observation=input_tensor_spec.zeros(outer_dims=(1, )),
         prev_action=None,
         env_id=None)
     self._encoding_net = EncodingNetwork(
         input_tensor_spec=input_tensor_spec)
Beispiel #3
0
    def __init__(self,
                 z_dim,
                 input_tensor_spec: TensorSpec = None,
                 preprocess_network: EncodingNetwork = None,
                 z_prior_network: EncodingNetwork = None,
                 beta=1.0,
                 name="VariationalAutoEncoder"):
        """

        Args:
            z_dim (int): dimension of latent vector ``z``, namely, the dimension
                for generating ``z_mean`` and ``z_log_var``.
            input_tensor_spec (nested TensorSpec): the input spec which can be
                a nest. If `preprocess_network` is None, then it must be provided.
            preprocess_network (EncodingNetwork): an encoding network to
                preprocess input data before projecting it into (mean, log_var).
                If ``z_prior_network`` is None, this network must be handle input
                with spec ``input_tensor_spec``. If ``z_prior_network`` is not
                None, this network must be handle input with spec
                ``(z_prior_network.input_tensor_spec, input_tensor_spec, z_prior_network.output_spec)``.
                If this is None, an MLP of hidden sizes ``(z_dim*2, z_dim*2)``
                will be used.
            z_prior_network (EncodingNetwork): an encoding network that
                outputs concatenation of a prior mean and prior log var given
                the prior input. The network shouldn't activate its output.
            beta (float): the weight for KL-divergence
            name (str):
        """
        super(VariationalAutoEncoder, self).__init__(name=name)

        self._preprocess_network = preprocess_network
        if preprocess_network is None:
            # according to appendix 2.4-2.5 in paper: https://arxiv.org/pdf/1803.10760.pdf
            if z_prior_network is None:
                preproc_input_spec = input_tensor_spec
            else:
                preproc_input_spec = (z_prior_network.input_tensor_spec,
                                      input_tensor_spec,
                                      z_prior_network.output_spec)
            self._preprocess_network = EncodingNetwork(
                input_tensor_spec=preproc_input_spec,
                preprocessing_combiner=alf.nest.utils.NestConcat(),
                fc_layer_params=(2 * z_dim, 2 * z_dim),
                activation=torch.tanh,
            )
        self._z_prior_network = z_prior_network

        size = self._preprocess_network.output_spec.shape[0]
        self._z_mean = FC(input_size=size, output_size=z_dim)
        self._z_log_var = FC(input_size=size, output_size=z_dim)
        self._beta = beta
        self._z_dim = z_dim
Beispiel #4
0
def create_simple_dynamics_net(input_tensor_spec):
    action_spec = input_tensor_spec[1]
    preproc = None
    if not action_spec.is_continuous:
        preproc = nn.Sequential(
            alf.layers.OneHot(num_classes=get_unique_num_actions(action_spec)),
            alf.layers.Reshape([-1]))
    return EncodingNetwork(input_tensor_spec,
                           input_preprocessors=(None, preproc),
                           preprocessing_combiner=alf.nest.utils.NestConcat(),
                           fc_layer_params=(256, 256),
                           last_layer_size=input_tensor_spec[0].numel,
                           last_activation=alf.math.identity)
Beispiel #5
0
    def __init__(self,
                 input_tensor_spec,
                 output_dim=None,
                 hidden_layers=(3, 3),
                 activation=torch.relu_,
                 net: Network = None,
                 use_relu_mlp=False,
                 use_bn=True,
                 optimizer=None,
                 name="CriticAlgorithm"):
        """Create a CriticAlgorithm.

        Args:
            input_tensor_spec (TensorSpec): spec of inputs. 
            output_dim (int): dimension of output, default value is input_dim.
            hidden_layers (tuple): size of hidden layers.
            activation (nn.functional): activation used for all critic layers.
            net (Network): network for predicting outputs from inputs.
                If None, a default one with hidden_layers will be created
            use_relu_mlp (bool): whether use ReluMLP as default net constrctor.
                Diagonals of Jacobian can be explicitly computed for ReluMLP.
            use_bn (bool): whether use batch norm for each critic layers.
            optimizer (torch.optim.Optimizer): (optional) optimizer for training.
            name (str): name of this CriticAlgorithm.
        """
        if optimizer is None:
            optimizer = alf.optimizers.Adam(lr=1e-3)
        super().__init__(train_state_spec=(), optimizer=optimizer, name=name)

        self._use_relu_mlp = use_relu_mlp
        self._output_dim = output_dim
        if output_dim is None:
            self._output_dim = input_tensor_spec.shape[0]
        if net is None:

            if use_relu_mlp:
                net = ReluMLP(input_tensor_spec=input_tensor_spec,
                              hidden_layers=hidden_layers,
                              activation=activation)
            else:
                net = EncodingNetwork(input_tensor_spec=input_tensor_spec,
                                      fc_layer_params=hidden_layers,
                                      use_fc_bn=use_bn,
                                      activation=activation,
                                      last_layer_size=self._output_dim,
                                      last_activation=math_ops.identity,
                                      last_use_fc_bn=use_bn,
                                      name='Critic')
        self._net = net
Beispiel #6
0
    def test_non_rnn(self):
        input_spec = TensorSpec((100, ), torch.float32)
        embedding = input_spec.zeros(outer_dims=(6, ))

        network = EncodingNetwork(input_tensor_spec=input_spec,
                                  fc_layer_params=(30, 40, 50),
                                  activation=torch.tanh)
        replicas = 4
        num_layers = 3

        pnet = NaiveParallelNetwork(network, replicas)

        self.assertEqual(len(list(pnet.parameters())),
                         num_layers * 2 * replicas)

        output, _ = pnet(embedding)
        self.assertEqual(output.shape, (6, replicas, 50))
        self.assertEqual(pnet.output_spec.shape, (replicas, 50))
Beispiel #7
0
    def __init__(self,
                 skill_spec,
                 encoding_net: EncodingNetwork,
                 reward_adapt_speed=8.0,
                 observation_spec=None,
                 hidden_size=(),
                 hidden_activation=torch.relu_,
                 name="DIAYNAlgorithm"):
        """Create a DIAYNAlgorithm.

        Args:
            skill_spec (TensorSpec): supports both discrete and continuous skills.
                In the discrete case, the algorithm will predict 1-of-K skills
                using the cross entropy loss; in the continuous case, the
                algorithm will predict the skill vector itself using the mean
                square error loss.
            encoding_net (EncodingNetwork): network for encoding observation into
                a latent feature.
            reward_adapt_speed (float): how fast to adapt the reward normalizer.
                rouphly speaking, the statistics for the normalization is
                calculated mostly based on the most recent `T/speed` samples,
                where `T` is the total number of samples.
            observation_spec (TensorSpec): If not None, this spec is to be used
                by a observation normalizer to normalize incoming observations.
                In some cases, the normalized observation can be easier for
                training the discriminator.
            hidden_size (tuple[int]): a tuple of hidden layer sizes used by the
                discriminator.
            hidden_activation (torch.nn.functional): activation for the hidden
                layers.
            name (str): module's name
        """
        assert isinstance(skill_spec, TensorSpec)

        self._skill_spec = skill_spec
        if skill_spec.is_discrete:
            assert isinstance(skill_spec, BoundedTensorSpec)
            skill_dim = skill_spec.maximum - skill_spec.minimum + 1
        else:
            assert len(
                skill_spec.shape) == 1, "Only 1D skill vector is supported"
            skill_dim = skill_spec.shape[0]

        super().__init__(
            train_state_spec=TensorSpec((skill_dim, )),
            predict_state_spec=(),  # won't be needed for predict_step
            name=name)

        self._encoding_net = encoding_net

        self._discriminator_net = EncodingNetwork(
            input_tensor_spec=encoding_net.output_spec,
            fc_layer_params=hidden_size,
            activation=hidden_activation,
            last_layer_size=skill_dim,
            last_activation=math_ops.identity)

        self._reward_normalizer = ScalarAdaptiveNormalizer(
            speed=reward_adapt_speed)

        self._observation_normalizer = None
        if observation_spec is not None:
            self._observation_normalizer = AdaptiveNormalizer(
                tensor_spec=observation_spec)
Beispiel #8
0
def create_simple_encoding_net(observation_spec):
    return EncodingNetwork(
        input_tensor_spec=observation_spec, fc_layer_params=(256, 256))
Beispiel #9
0
    def __init__(self,
                 x_spec,
                 y_spec,
                 model=None,
                 fc_layers=(256, ),
                 sampler='buffer',
                 buffer_size=65536,
                 optimizer: torch.optim.Optimizer = None,
                 estimator_type='DV',
                 averager: EMAverager = None,
                 name="MIEstimator"):
        """

        Args:
            x_spec (nested TensorSpec): spec of ``x``
            y_spec (nested TensorSpec): spec of ``y``
            model (Network): can be called as ``model([x, y])`` and return a Tensor
                with ``shape=[batch_size, 1]``. If None, a default MLP with
                ``fc_layers`` will be created.
            fc_layers (tuple[int]): size of hidden layers. Only used if model is
                None.
            sampler (str): type of sampler used to get samples from marginal
                distribution, should be one of ``['buffer', 'double_buffer',
                'shuffle', 'shift']``.
            buffer_size (int): capacity of buffer for storing y for sampler
                'buffer' and 'double_buffer'.
            optimzer (torch.optim.Optimzer): optimizer
            estimator_type (str): one of 'DV', 'KLD' or 'JSD'
            averager (EMAverager): averager used to maintain a moving average
                of :math:`exp(T)`. Only used for 'DV' estimator. If None, 
                a ScalarAdaptiveAverager will be created.
            name (str): name of this estimator
        """
        assert estimator_type in ['ML', 'DV', 'KLD', 'JSD'
                                  ], "Wrong estimator_type %s" % estimator_type
        super().__init__(train_state_spec=(), optimizer=optimizer, name=name)
        self._x_spec = x_spec
        self._y_spec = y_spec
        if model is None:
            if estimator_type == 'ML':
                model = EncodingNetwork(
                    name="MIEstimator",
                    input_tensor_spec=x_spec,
                    fc_layer_params=fc_layers,
                    preprocessing_combiner=NestConcat(dim=-1))
            else:
                model = EncodingNetwork(
                    name="MIEstimator",
                    input_tensor_spec=[x_spec, y_spec],
                    preprocessing_combiner=NestConcat(dim=-1),
                    fc_layer_params=fc_layers,
                    last_layer_size=1,
                    last_activation=math_ops.identity)
        self._model = model
        self._type = estimator_type
        if sampler == 'buffer':
            self._y_buffer = DataBuffer(y_spec, capacity=buffer_size)
            self._sampler = self._buffer_sampler
        elif sampler == 'double_buffer':
            self._x_buffer = DataBuffer(x_spec, capacity=buffer_size)
            self._y_buffer = DataBuffer(y_spec, capacity=buffer_size)
            self._sampler = self._double_buffer_sampler
        elif sampler == 'shuffle':
            self._sampler = self._shuffle_sampler
        elif sampler == 'shift':
            self._sampler = self._shift_sampler
        else:
            raise TypeError("Wrong type for sampler %s" % sampler)

        if estimator_type == 'DV':
            if averager is None:
                averager = ScalarAdaptiveAverager()
            self._mean_averager = averager
        if estimator_type == 'ML':
            assert isinstance(
                y_spec,
                alf.TensorSpec), ("Currently, 'ML' does "
                                  "not support nested y_spec: %s" % y_spec)
            assert y_spec.is_continuous, ("Currently, 'ML' does "
                                          "not support discreted y_spec: %s" %
                                          y_spec)
            hidden_size = self._model.output_spec.shape[-1]
            self._delta_loc_layer = alf.layers.FC(
                hidden_size,
                y_spec.shape[-1],
                kernel_initializer=torch.nn.init.zeros_,
                bias_init_value=0.0)
            self._delta_scale_layer = alf.layers.FC(
                hidden_size,
                y_spec.shape[-1],
                kernel_initializer=torch.nn.init.zeros_,
                bias_init_value=math.log(math.e - 1))
Beispiel #10
0
    def __init__(self,
                 input_tensor_spec: TensorSpec,
                 action_spec: BoundedTensorSpec,
                 input_preprocessors=None,
                 preprocessing_combiner=None,
                 conv_layer_params=None,
                 fc_layer_params=None,
                 activation=torch.relu_,
                 kernel_initializer=None,
                 name="QNetwork"):
        """Creates an instance of ``QNetwork`` for estimating action-value of
        discrete actions. The action-value is defined as the expected return
        starting from the given input observation and taking the given action.
        It takes observation as input and outputs an action-value tensor with
        the shape of ``[batch_size, num_of_actions]``.

        Args:
            input_tensor_spec (TensorSpec): the tensor spec of the input
            action_spec (TensorSpec): the tensor spec of the action
            input_preprocessors (nested InputPreprocessor): a nest of
                ``InputPreprocessor``, each of which will be applied to the
                corresponding input. If not None, then it must
                have the same structure with ``input_tensor_spec`` (after reshaping).
                If any element is None, then it will be treated as ``math_ops.identity``.
                This arg is helpful if you want to have separate preprocessings
                for different inputs by configuring a gin file without changing
                the code. For example, embedding a discrete input before concatenating
                it to another continuous vector.
            preprocessing_combiner (NestCombiner): preprocessing called on
                complex inputs. Note that this combiner must also accept
                ``input_tensor_spec`` as the input to compute the processed
                tensor spec. For example, see ``alf.nest.utils.NestConcat``. This
                arg is helpful if you want to combine inputs by configuring a
                gin file without changing the code.
            conv_layer_params (tuple[tuple]): a tuple of tuples where each
                tuple takes a format ``(filters, kernel_size, strides, padding)``,
                where ``padding`` is optional.
            fc_layer_params (tuple[int]): a tuple of integers representing hidden
                FC layer sizes.
            activation (nn.functional): activation used for hidden layers. The
                last layer will not be activated.
            kernel_initializer (Callable): initializer for all the layers but
                the last layer. If none is provided a default ``variance_scaling_initializer``
                will be used.
        """
        super(QNetwork, self).__init__(input_tensor_spec, name=name)

        assert len(nest.flatten(action_spec)) == 1, (
            "Currently only support a single discrete action! Use "
            "CriticNetwork instead for multiple actions.")

        num_actions = action_spec.maximum - action_spec.minimum + 1
        self._output_spec = TensorSpec((num_actions, ))

        self._encoding_net = EncodingNetwork(
            input_tensor_spec=input_tensor_spec,
            input_preprocessors=input_preprocessors,
            preprocessing_combiner=preprocessing_combiner,
            conv_layer_params=conv_layer_params,
            fc_layer_params=fc_layer_params,
            activation=activation,
            kernel_initializer=kernel_initializer)

        last_kernel_initializer = functools.partial(torch.nn.init.uniform_, \
                                    a=-0.003, b=0.003)

        self._final_layer = layers.FC(
            self._encoding_net.output_spec.shape[0],
            num_actions,
            activation=math_ops.identity,
            kernel_initializer=last_kernel_initializer,
            bias_init_value=-0.2)
Beispiel #11
0
    def __init__(self,
                 input_tensor_spec,
                 conv_layer_params=None,
                 fc_layer_params=None,
                 activation=torch.relu_,
                 last_layer_param=None,
                 last_activation=None,
                 noise_dim=32,
                 hidden_layers=(64, 64),
                 use_fc_bn=False,
                 num_particles=10,
                 entropy_regularization=1.,
                 critic_optimizer=None,
                 critic_hidden_layers=(100, 100),
                 function_vi=False,
                 function_bs=None,
                 function_extra_bs_ratio=0.1,
                 function_extra_bs_sampler='uniform',
                 function_extra_bs_std=1.,
                 loss_type="classification",
                 voting="soft",
                 par_vi="svgd",
                 optimizer=None,
                 logging_network=False,
                 logging_training=False,
                 logging_evaluate=False,
                 config: TrainerConfig = None,
                 name="HyperNetwork"):
        """
        Args:
            Args for the generated parametric network
            ====================================================================
            input_tensor_spec (nested TensorSpec): the (nested) tensor spec of
                the input. If nested, then ``preprocessing_combiner`` must not be
                None.
            conv_layer_params (tuple[tuple]): a tuple of tuples where each
                tuple takes a format 
                ``(filters, kernel_size, strides, padding, pooling_kernel)``,
                where ``padding`` and ``pooling_kernel`` are optional.
            fc_layer_params (tuple[tuple]): a tuple of tuples where each tuple
                takes a format ``(FC layer sizes. use_bias)``, where 
                ``use_bias`` is optional.
            activation (nn.functional): activation used for all the layers but
                the last layer.
            last_layer_param (tuple): an optional tuple of the format
                ``(size, use_bias)``, where ``use_bias`` is optional,
                it appends an additional layer at the very end. 
                Note that if ``last_activation`` is specified, 
                ``last_layer_param`` has to be specified explicitly.
            last_activation (nn.functional): activation function of the
                additional layer specified by ``last_layer_param``. Note that if
                ``last_layer_param`` is not None, ``last_activation`` has to be
                specified explicitly.

            Args for the generator
            ====================================================================
            noise_dim (int): dimension of noise
            hidden_layers (tuple): size of hidden layers.
            use_fc_bn (bool): whether use batnch normalization for fc layers.
            num_particles (int): number of sampling particles
            entropy_regularization (float): weight of entropy regularization

            Args for the critic (used when par_vi is ``minmax``)
            ====================================================================
            critic_optimizer (torch.optim.Optimizer): the optimizer for training critic.
            critic_hidden_layers (tuple): sizes of critic hidden layeres. 

            Args for function_vi
            ====================================================================
            function_vi (bool): whether to use funciton value based par_vi, current
                supported by [``svgd2``, ``svgd3``, ``gfsf``].
            function_bs (int): mini batch size for par_vi training. 
                Needed for critic initialization when function_vi is True. 
            function_extra_bs_ratio (float): ratio of extra sampled batch size 
                w.r.t. the function_bs.
            function_extra_bs_sampler (str): type of sampling method for extra
                training batch, types are [``uniform``, ``normal``].
            function_extra_bs_std (float): std of the normal distribution for
                sampling extra training batch when using normal sampler.

            Args for training and testing
            ====================================================================
            loss_type (str): loglikelihood type for the generated functions,
                types are [``classification``, ``regression``]
            voting (str): types of voting results from sampled functions,
                types are [``soft``, ``hard``]
            par_vi (str): types of particle-based methods for variational inference,
                types are [``svgd``, ``svgd2``, ``svgd3``, ``gfsf``, ``minmax``],
                * svgd: empirical expectation of SVGD is evaluated by a single 
                    resampled particle. The main benefit of this choice is it 
                    supports conditional case, while all other options do not.
                * svgd2: empirical expectation of SVGD is evaluated by splitting
                    half of the sampled batch. It is a trade-off between 
                    computational efficiency and convergence speed.
                * svgd3: empirical expectation of SVGD is evaluated by 
                    resampled particles of the same batch size. It has better
                    convergence but involves resampling, so less efficient
                    computaionally comparing with svgd2.
                * gfsf: wasserstein gradient flow with smoothed functions. It 
                    involves a kernel matrix inversion, so computationally most
                    expensive, but in some case the convergence seems faster 
                    than svgd approaches.
                * minmax: Fisher Neural Sampler, optimal descent direction of
                    the Stein discrepancy is solved by an inner optimization
                    procedure in the space of L2 neural networks.
            optimizer (torch.optim.Optimizer): The optimizer for training generator.
            logging_network (bool): whether logging the archetectures of networks.
            logging_training (bool): whether logging loss and acc during training.
            logging_evaluate (bool): whether logging loss and acc of evaluate.
            config (TrainerConfig): configuration for training
            name (str):
        """
        super().__init__(train_state_spec=(), optimizer=optimizer, name=name)

        param_net = ParamNetwork(input_tensor_spec=input_tensor_spec,
                                 conv_layer_params=conv_layer_params,
                                 fc_layer_params=fc_layer_params,
                                 activation=activation,
                                 last_layer_param=last_layer_param,
                                 last_activation=last_activation)

        gen_output_dim = param_net.param_length
        noise_spec = TensorSpec(shape=(noise_dim, ))
        net = EncodingNetwork(noise_spec,
                              fc_layer_params=hidden_layers,
                              use_fc_bn=use_fc_bn,
                              last_layer_size=gen_output_dim,
                              last_activation=math_ops.identity,
                              name="Generator")

        if logging_network:
            logging.info("Generated network")
            logging.info("-" * 68)
            logging.info(param_net)

            logging.info("Generator network")
            logging.info("-" * 68)
            logging.info(net)

        if par_vi == 'svgd':
            par_vi = 'svgd3'

        if function_vi:
            assert par_vi in ('svgd2', 'svgd3', 'gfsf'), (
                "Function_vi is not support for par_vi method: %s" % par_vi)
            assert function_bs is not None, (
                "Need to specify batch_size of function outputs.")
            assert function_extra_bs_sampler in ('uniform', 'normal'), (
                "Unsupported sampling type %s for extra training batch" %
                (function_extra_bs_sampler))
            self._function_extra_bs = math.ceil(function_bs *
                                                function_extra_bs_ratio)
            self._function_extra_bs_sampler = function_extra_bs_sampler
            self._function_extra_bs_std = function_extra_bs_std
            critic_input_dim = (function_bs +
                                self._function_extra_bs) * last_layer_param[0]
        else:
            critic_input_dim = gen_output_dim

        self._generator = Generator(
            gen_output_dim,
            noise_dim=noise_dim,
            net=net,
            entropy_regularization=entropy_regularization,
            par_vi=par_vi,
            critic_input_dim=critic_input_dim,
            critic_hidden_layers=critic_hidden_layers,
            optimizer=None,
            critic_optimizer=critic_optimizer,
            name=name)

        self._param_net = param_net
        self._num_particles = num_particles
        self._entropy_regularization = entropy_regularization
        self._train_loader = None
        self._test_loader = None
        self._use_fc_bn = use_fc_bn
        self._loss_type = loss_type
        self._function_vi = function_vi
        self._logging_training = logging_training
        self._logging_evaluate = logging_evaluate
        self._config = config
        assert (voting in ['soft',
                           'hard']), ('voting only supports "soft" and "hard"')
        self._voting = voting
        if loss_type == 'classification':
            self._loss_func = classification_loss
            self._vote = self._classification_vote
        elif loss_type == 'regression':
            self._loss_func = regression_loss
            self._vote = self._regression_vote
        else:
            raise ValueError("Unsupported loss_type: %s" % loss_type)
Beispiel #12
0
    def __init__(self,
                 input_tensor_spec,
                 conv_layer_params=None,
                 fc_layer_params=None,
                 activation=torch.relu_,
                 last_layer_param=None,
                 last_activation=None,
                 noise_dim=32,
                 hidden_layers=(64, 64),
                 use_fc_bn=False,
                 num_particles=10,
                 entropy_regularization=1.,
                 loss_type="classification",
                 voting="soft",
                 par_vi="svgd",
                 optimizer=None,
                 logging_network=False,
                 logging_training=False,
                 logging_evaluate=False,
                 config: TrainerConfig = None,
                 name="HyperNetwork"):
        """
        Args:
            Args for the generated parametric network
            ====================================================================
            input_tensor_spec (nested TensorSpec): the (nested) tensor spec of
                the input. If nested, then ``preprocessing_combiner`` must not be
                None.
            conv_layer_params (tuple[tuple]): a tuple of tuples where each
                tuple takes a format 
                ``(filters, kernel_size, strides, padding, pooling_kernel)``,
                where ``padding`` and ``pooling_kernel`` are optional.
            fc_layer_params (tuple[tuple]): a tuple of tuples where each tuple
                takes a format ``(FC layer sizes. use_bias)``, where 
                ``use_bias`` is optional.
            activation (nn.functional): activation used for all the layers but
                the last layer.
            last_layer_param (tuple): an optional tuple of the format
                ``(size, use_bias)``, where ``use_bias`` is optional,
                it appends an additional layer at the very end. 
                Note that if ``last_activation`` is specified, 
                ``last_layer_param`` has to be specified explicitly.
            last_activation (nn.functional): activation function of the
                additional layer specified by ``last_layer_param``. Note that if
                ``last_layer_param`` is not None, ``last_activation`` has to be
                specified explicitly.

            Args for the generator
            ====================================================================
            noise_dim (int): dimension of noise
            hidden_layers (tuple): size of hidden layers.
            use_fc_bn (bool): whether use batnch normalization for fc layers.
            num_particles (int): number of sampling particles
            entropy_regularization (float): weight of entropy regularization

            Args for training and testing
            ====================================================================
            loss_type (str): loglikelihood type for the generated functions,
                types are [``classification``, ``regression``]
            voting (str): types of voting results from sampled functions,
                types are [``soft``, ``hard``]
            par_vi (str): types of particle-based methods for variational inference,
                types are [``svgd``, ``svgd2``, ``svgd3``, ``gfsf``]
            optimizer (torch.optim.Optimizer): The optimizer for training.
            logging_network (bool): whether logging the archetectures of networks.
            logging_training (bool): whether logging loss and acc during training.
            logging_evaluate (bool): whether logging loss and acc of evaluate.
            config (TrainerConfig): configuration for training
            name (str):
        """
        super().__init__(train_state_spec=(), optimizer=optimizer, name=name)

        param_net = ParamNetwork(input_tensor_spec=input_tensor_spec,
                                 conv_layer_params=conv_layer_params,
                                 fc_layer_params=fc_layer_params,
                                 activation=activation,
                                 last_layer_param=last_layer_param,
                                 last_activation=last_activation)

        gen_output_dim = param_net.param_length
        noise_spec = TensorSpec(shape=(noise_dim, ))
        net = EncodingNetwork(noise_spec,
                              fc_layer_params=hidden_layers,
                              use_fc_bn=use_fc_bn,
                              last_layer_size=gen_output_dim,
                              last_activation=math_ops.identity,
                              name="Generator")

        if logging_network:
            logging.info("Generated network")
            logging.info("-" * 68)
            logging.info(param_net)

            logging.info("Generator network")
            logging.info("-" * 68)
            logging.info(net)

        if par_vi == 'svgd':
            par_vi = 'svgd3'

        self._generator = Generator(
            gen_output_dim,
            noise_dim=noise_dim,
            net=net,
            entropy_regularization=entropy_regularization,
            par_vi=par_vi,
            optimizer=None,
            name=name)

        self._param_net = param_net
        self._num_particles = num_particles
        self._entropy_regularization = entropy_regularization
        self._train_loader = None
        self._test_loader = None
        self._use_fc_bn = use_fc_bn
        self._loss_type = loss_type
        self._logging_training = logging_training
        self._logging_evaluate = logging_evaluate
        self._config = config
        assert (voting in ['soft',
                           'hard']), ('voting only supports "soft" and "hard"')
        self._voting = voting
        if loss_type == 'classification':
            self._loss_func = classification_loss
            self._vote = self._classification_vote
        elif loss_type == 'regression':
            self._loss_func = regression_loss
            self._vote = self._regression_vote
        else:
            raise ValueError("Unsupported loss_type: %s" % loss_type)
Beispiel #13
0
    def __init__(self,
                 output_dim,
                 noise_dim=32,
                 input_tensor_spec=None,
                 hidden_layers=(256, ),
                 net: Network = None,
                 net_moving_average_rate=None,
                 entropy_regularization=0.,
                 mi_weight=None,
                 mi_estimator_cls=MIEstimator,
                 par_vi="gfsf",
                 optimizer=None,
                 name="Generator"):
        r"""Create a Generator.

        Args:
            output_dim (int): dimension of output
            noise_dim (int): dimension of noise
            input_tensor_spec (nested TensorSpec): spec of inputs. If there is
                no inputs, this should be None.
            hidden_layers (tuple): size of hidden layers.
            net (Network): network for generating outputs from [noise, inputs]
                or noise (if inputs is None). If None, a default one with
                hidden_layers will be created
            net_moving_average_rate (float): If provided, use a moving average
                version of net to do prediction. This has been shown to be
                effective for GAN training (arXiv:1907.02544, arXiv:1812.04948).
            entropy_regularization (float): weight of entropy regularization
            mi_estimator_cls (type): the class of mutual information estimator
                for maximizing the mutual information between [noise, inputs]
                and [outputs, inputs].
            par_vi (string): ParVI methods, options are
                [``svgd``, ``svgd2``, ``svgd3``, ``gfsf``],
                * svgd: empirical expectation of SVGD is evaluated by a single 
                    resampled particle. The main benefit of this choice is it 
                    supports conditional case, while all other options do not.
                * svgd2: empirical expectation of SVGD is evaluated by splitting
                    half of the sampled batch. It is a trade-off between 
                    computational efficiency and convergence speed.
                * svgd3: empirical expectation of SVGD is evaluated by 
                    resampled particles of the same batch size. It has better
                    convergence but involves resampling, so less efficient
                    computaionally comparing with svgd2.
                * gfsf: wasserstein gradient flow with smoothed functions. It 
                    involves a kernel matrix inversion, so computationally most
                    expensive, but in some case the convergence seems faster 
                    than svgd approaches.
            optimizer (torch.optim.Optimizer): (optional) optimizer for training
            name (str): name of this generator
        """
        super().__init__(train_state_spec=(), optimizer=optimizer, name=name)
        self._noise_dim = noise_dim
        self._entropy_regularization = entropy_regularization
        self._par_vi = par_vi
        if entropy_regularization == 0:
            self._grad_func = self._ml_grad
        else:
            if par_vi == 'gfsf':
                self._grad_func = self._gfsf_grad
            elif par_vi == 'svgd':
                self._grad_func = self._svgd_grad
            elif par_vi == 'svgd2':
                self._grad_func = self._svgd_grad2
            elif par_vi == 'svgd3':
                self._grad_func = self._svgd_grad3
            else:
                raise ValueError("Unsupported par_vi method: %s" % par_vi)

            self._kernel_width_averager = AdaptiveAverager(
                tensor_spec=TensorSpec(shape=()))

        noise_spec = TensorSpec(shape=(noise_dim, ))

        if net is None:
            net_input_spec = noise_spec
            if input_tensor_spec is not None:
                net_input_spec = [net_input_spec, input_tensor_spec]
            net = EncodingNetwork(input_tensor_spec=net_input_spec,
                                  fc_layer_params=hidden_layers,
                                  last_layer_size=output_dim,
                                  last_activation=math_ops.identity,
                                  name="Generator")

        self._mi_estimator = None
        self._input_tensor_spec = input_tensor_spec
        if mi_weight is not None:
            x_spec = noise_spec
            y_spec = TensorSpec((output_dim, ))
            if input_tensor_spec is not None:
                x_spec = [x_spec, input_tensor_spec]
            self._mi_estimator = mi_estimator_cls(x_spec,
                                                  y_spec,
                                                  sampler='shift')
            self._mi_weight = mi_weight
        self._net = net
        self._predict_net = None
        self._net_moving_average_rate = net_moving_average_rate
        if net_moving_average_rate:
            self._predict_net = net.copy(name="Genrator_average")
            self._predict_net_updater = common.get_target_updater(
                self._net, self._predict_net, tau=net_moving_average_rate)
Beispiel #14
0
    def __init__(self,
                 action_spec,
                 encoders,
                 decoders,
                 num_read_keys=3,
                 lstm_size=(256, 256),
                 latent_dim=200,
                 memory_size=1350,
                 loss_weight=1.0,
                 name="mbp"):
        """
        Args:
            action_spec (nested BoundedTensorSpec): representing the actions.
            encoders (nested Network): the nest should match observation_spec
            decoders (nested Algorithm): the nest should match observation_spec
            num_read_keys (int): number of keys for reading memory.
            lstm_size (list[int]): size of lstm layers for MBP and MBA
            latent_dim (int): the dimension of the hidden representation of VAE.
            memroy_size (int): number of memory slots
            loss_weight (float): weight for the loss
            name (str): name of the algorithm.
        """
        action_encoder = SimpleActionEncoder(action_spec)

        memory = MemoryWithUsage(
            latent_dim, memory_size, name=name + "/memory")

        rnn_input_size = (latent_dim + num_read_keys * latent_dim +
                          action_encoder.output_spec.shape[0])

        rnn = LSTMEncodingNetwork(
            input_tensor_spec=alf.TensorSpec((rnn_input_size, )),
            hidden_size=lstm_size,
            name=name + "/lstm")

        state_spec = MBPState(
            latent_vector=alf.TensorSpec((latent_dim, )),
            mem_readout=alf.TensorSpec((num_read_keys * latent_dim, )),
            rnn_state=rnn.state_spec,
            memory=memory.state_spec)

        super().__init__(train_state_spec=state_spec, name=name)

        self._encoders = encoders
        self._decoders = decoders
        self._action_encoder = action_encoder

        self._rnn = rnn
        self._memory = memory

        self._key_net = self._memory.create_keynet(rnn.output_spec,
                                                   num_read_keys)

        prior_network = EncodingNetwork(
            input_tensor_spec=(rnn.output_spec, state_spec.mem_readout),
            preprocessing_combiner=alf.nest.utils.NestConcat(),
            fc_layer_params=(2 * latent_dim, 2 * latent_dim),
            activation=torch.tanh,
            last_layer_size=2 * latent_dim,
            last_activation=math_ops.identity,
            name=name + "/prior_network")

        encoder_output_specs = alf.nest.map_structure(
            lambda encoder: encoder.output_spec, self._encoders)
        self._vae = VariationalAutoEncoder(
            latent_dim,
            input_tensor_spec=encoder_output_specs,
            z_prior_network=prior_network,
            name=name + "/vae")

        self._loss_weight = loss_weight
Beispiel #15
0
    def __init__(self,
                 action_spec,
                 observation_spec=None,
                 hidden_size=256,
                 reward_adapt_speed=8.0,
                 encoding_net: EncodingNetwork = None,
                 forward_net: EncodingNetwork = None,
                 inverse_net: EncodingNetwork = None,
                 activation=torch.relu_,
                 optimizer=None,
                 name="ICMAlgorithm"):
        """Create an ICMAlgorithm.

        Args
            action_spec (nested TensorSpec): agent's action spec
            observation_spec (nested TensorSpec): agent's observation spec. If
                not None, then a normalizer will be used to normalize the
                observation.
            hidden_size (int or tuple[int]): size of hidden layer(s)
            reward_adapt_speed (float): how fast to adapt the reward normalizer.
                rouphly speaking, the statistics for the normalization is
                calculated mostly based on the most recent T/speed samples,
                where T is the total number of samples.
            encoding_net (Network): network for encoding observation into a
                latent feature. Its input is same as the input of this algorithm.
            forward_net (Network): network for predicting next feature based on
                previous feature and action. It should accept input with spec
                [feature_spec, encoded_action_spec] and output a tensor of shape
                feature_spec. For discrete action, encoded_action is an one-hot
                representation of the action. For continuous action, encoded
                action is same as the original action.
            inverse_net (Network): network for predicting previous action given
                the previous feature and current feature. It should accept input
                with spec [feature_spec, feature_spec] and output tensor of
                shape (num_actions,).
            activation (torch.nn.functional): activation used for constructing
                any of the forward net and inverse net, if not provided.
            optimizer (torch.optim.Optimizer): The optimizer for training
            name (str):
        """
        if encoding_net is not None:
            feature_spec = encoding_net.output_spec
        else:
            feature_spec = observation_spec

        super(ICMAlgorithm, self).__init__(
            train_state_spec=feature_spec,
            predict_state_spec=(),
            optimizer=optimizer,
            name=name)

        flat_action_spec = alf.nest.flatten(action_spec)
        assert len(
            flat_action_spec) == 1, "ICM doesn't suport nested action_spec"

        flat_feature_spec = alf.nest.flatten(feature_spec)
        assert len(
            flat_feature_spec) == 1, "ICM doesn't support nested feature_spec"

        action_spec = flat_action_spec[0]

        if action_spec.is_discrete:
            self._num_actions = int(action_spec.maximum - action_spec.minimum +
                                    1)
        else:
            self._num_actions = action_spec.shape[-1]

        self._action_spec = action_spec
        self._observation_normalizer = None
        if observation_spec is not None:
            self._observation_normalizer = AdaptiveNormalizer(
                tensor_spec=observation_spec)

        feature_dim = flat_feature_spec[0].shape[-1]

        self._encoding_net = encoding_net

        if isinstance(hidden_size, int):
            hidden_size = (hidden_size, )

        if forward_net is None:
            encoded_action_spec = TensorSpec((self._num_actions, ),
                                             dtype=torch.float32)
            forward_net = EncodingNetwork(
                name="forward_net",
                input_tensor_spec=[feature_spec, encoded_action_spec],
                preprocessing_combiner=NestConcat(),
                fc_layer_params=hidden_size,
                activation=activation,
                last_layer_size=feature_dim,
                last_activation=math_ops.identity)

        self._forward_net = forward_net

        if inverse_net is None:
            inverse_net = EncodingNetwork(
                name="inverse_net",
                input_tensor_spec=[feature_spec, feature_spec],
                preprocessing_combiner=NestConcat(),
                fc_layer_params=hidden_size,
                activation=activation,
                last_layer_size=self._num_actions,
                last_activation=math_ops.identity,
                last_kernel_initializer=torch.nn.init.zeros_)

        self._inverse_net = inverse_net

        self._reward_normalizer = ScalarAdaptiveNormalizer(
            speed=reward_adapt_speed)
Beispiel #16
0
    def test_conditional_vae(self):
        """Test for one dimensional Gaussion, conditioned on a Bernoulli variable.
        """
        prior_input_spec = BoundedTensorSpec((), 'int64')

        z_prior_network = EncodingNetwork(
            TensorSpec(
                (prior_input_spec.maximum - prior_input_spec.minimum + 1, )),
            fc_layer_params=(10, ) * 2,
            last_layer_size=2 * self._latent_dim,
            last_activation=math_ops.identity)
        preprocess_network = EncodingNetwork(
            input_tensor_spec=(
                z_prior_network.input_tensor_spec,
                self._input_spec,
                z_prior_network.output_spec,
            ),
            preprocessing_combiner=NestConcat(),
            fc_layer_params=(10, ) * 2,
            last_layer_size=self._latent_dim,
            last_activation=math_ops.identity)

        encoder = vae.VariationalAutoEncoder(
            self._latent_dim,
            preprocess_network=preprocess_network,
            z_prior_network=z_prior_network)
        decoding_layers = FC(self._latent_dim, 1)

        optimizer = torch.optim.Adam(
            list(encoder.parameters()) + list(decoding_layers.parameters()),
            lr=0.1)

        x_train = self._input_spec.randn(outer_dims=(10000, ))
        y_train = x_train.clone()
        y_train[:5000] = y_train[:5000] + 1.0
        pr_train = torch.cat([
            prior_input_spec.zeros(outer_dims=(5000, )),
            prior_input_spec.ones(outer_dims=(5000, ))
        ],
                             dim=0)

        x_test = self._input_spec.randn(outer_dims=(100, ))
        y_test = x_test.clone()
        y_test[:50] = y_test[:50] + 1.0
        pr_test = torch.cat([
            prior_input_spec.zeros(outer_dims=(50, )),
            prior_input_spec.ones(outer_dims=(50, ))
        ],
                            dim=0)
        pr_test = torch.nn.functional.one_hot(
            pr_test,
            int(z_prior_network.input_tensor_spec.shape[0])).to(torch.float32)

        for _ in range(self._epochs):
            idx = torch.randperm(x_train.shape[0])
            x_train = x_train[idx]
            y_train = y_train[idx]
            pr_train = pr_train[idx]
            for i in range(0, x_train.shape[0], self._batch_size):
                optimizer.zero_grad()
                batch = x_train[i:i + self._batch_size]
                y_batch = y_train[i:i + self._batch_size]
                pr_batch = torch.nn.functional.one_hot(
                    pr_train[i:i + self._batch_size],
                    int(z_prior_network.input_tensor_spec.shape[0])).to(
                        torch.float32)
                alg_step = encoder.train_step([pr_batch, batch])
                outputs = decoding_layers(alg_step.output)
                loss = torch.mean(100 * self._loss_f(y_batch - outputs) +
                                  alg_step.info.loss)
                loss.backward()
                optimizer.step()

        y_hat_test = decoding_layers(
            encoder.train_step([pr_test, x_test]).output)
        reconstruction_loss = float(
            torch.mean(self._loss_f(y_test - y_hat_test)))
        print("reconstruction_loss:", reconstruction_loss)
        self.assertLess(reconstruction_loss, 0.05)