def __init__(self, train_state_spec, action_spec, feature_spec, hidden_size=256, dynamics_network: Network = None, name="DynamicsLearningAlgorithm"): """Create a DynamicsLearningAlgorithm. Args: hidden_size (int|tuple): size of hidden layer(s) dynamics_network (Network): network for predicting the change of the next feature based on the previous feature and action. It should accept input with spec of the format [feature_spec, encoded_action_spec] and output a tensor of the shape feature_spec. For discrete action case, encoded_action is a one-hot representation of the action. For continuous action, encoded action is the original action. """ super().__init__(train_state_spec=train_state_spec, name=name) flat_action_spec = nest.flatten(action_spec) assert len(flat_action_spec) == 1, "doesn't support nested action_spec" flat_feature_spec = nest.flatten(feature_spec) assert len( flat_feature_spec) == 1, "doesn't support nested feature_spec" action_spec = flat_action_spec[0] if action_spec.is_discrete: self._num_actions = action_spec.maximum - action_spec.minimum + 1 else: self._num_actions = action_spec.shape[-1] self._action_spec = action_spec self._feature_spec = feature_spec feature_dim = flat_feature_spec[0].shape[-1] if isinstance(hidden_size, int): hidden_size = (hidden_size, ) if dynamics_network is None: encoded_action_spec = TensorSpec((self._num_actions, ), dtype=torch.float32) dynamics_network = EncodingNetwork( name="dynamics_net", input_tensor_spec=(feature_spec, encoded_action_spec), preprocessing_combiner=NestConcat(), fc_layer_params=hidden_size, last_layer_size=feature_dim, last_activation=math_ops.identity) self._dynamics_network = dynamics_network
def setUp(self): input_tensor_spec = TensorSpec((10, )) self._time_step = TimeStep( step_type=torch.tensor(StepType.MID, dtype=torch.int32), reward=0, discount=1, observation=input_tensor_spec.zeros(outer_dims=(1, )), prev_action=None, env_id=None) self._encoding_net = EncodingNetwork( input_tensor_spec=input_tensor_spec)
def __init__(self, z_dim, input_tensor_spec: TensorSpec = None, preprocess_network: EncodingNetwork = None, z_prior_network: EncodingNetwork = None, beta=1.0, name="VariationalAutoEncoder"): """ Args: z_dim (int): dimension of latent vector ``z``, namely, the dimension for generating ``z_mean`` and ``z_log_var``. input_tensor_spec (nested TensorSpec): the input spec which can be a nest. If `preprocess_network` is None, then it must be provided. preprocess_network (EncodingNetwork): an encoding network to preprocess input data before projecting it into (mean, log_var). If ``z_prior_network`` is None, this network must be handle input with spec ``input_tensor_spec``. If ``z_prior_network`` is not None, this network must be handle input with spec ``(z_prior_network.input_tensor_spec, input_tensor_spec, z_prior_network.output_spec)``. If this is None, an MLP of hidden sizes ``(z_dim*2, z_dim*2)`` will be used. z_prior_network (EncodingNetwork): an encoding network that outputs concatenation of a prior mean and prior log var given the prior input. The network shouldn't activate its output. beta (float): the weight for KL-divergence name (str): """ super(VariationalAutoEncoder, self).__init__(name=name) self._preprocess_network = preprocess_network if preprocess_network is None: # according to appendix 2.4-2.5 in paper: https://arxiv.org/pdf/1803.10760.pdf if z_prior_network is None: preproc_input_spec = input_tensor_spec else: preproc_input_spec = (z_prior_network.input_tensor_spec, input_tensor_spec, z_prior_network.output_spec) self._preprocess_network = EncodingNetwork( input_tensor_spec=preproc_input_spec, preprocessing_combiner=alf.nest.utils.NestConcat(), fc_layer_params=(2 * z_dim, 2 * z_dim), activation=torch.tanh, ) self._z_prior_network = z_prior_network size = self._preprocess_network.output_spec.shape[0] self._z_mean = FC(input_size=size, output_size=z_dim) self._z_log_var = FC(input_size=size, output_size=z_dim) self._beta = beta self._z_dim = z_dim
def create_simple_dynamics_net(input_tensor_spec): action_spec = input_tensor_spec[1] preproc = None if not action_spec.is_continuous: preproc = nn.Sequential( alf.layers.OneHot(num_classes=get_unique_num_actions(action_spec)), alf.layers.Reshape([-1])) return EncodingNetwork(input_tensor_spec, input_preprocessors=(None, preproc), preprocessing_combiner=alf.nest.utils.NestConcat(), fc_layer_params=(256, 256), last_layer_size=input_tensor_spec[0].numel, last_activation=alf.math.identity)
def __init__(self, input_tensor_spec, output_dim=None, hidden_layers=(3, 3), activation=torch.relu_, net: Network = None, use_relu_mlp=False, use_bn=True, optimizer=None, name="CriticAlgorithm"): """Create a CriticAlgorithm. Args: input_tensor_spec (TensorSpec): spec of inputs. output_dim (int): dimension of output, default value is input_dim. hidden_layers (tuple): size of hidden layers. activation (nn.functional): activation used for all critic layers. net (Network): network for predicting outputs from inputs. If None, a default one with hidden_layers will be created use_relu_mlp (bool): whether use ReluMLP as default net constrctor. Diagonals of Jacobian can be explicitly computed for ReluMLP. use_bn (bool): whether use batch norm for each critic layers. optimizer (torch.optim.Optimizer): (optional) optimizer for training. name (str): name of this CriticAlgorithm. """ if optimizer is None: optimizer = alf.optimizers.Adam(lr=1e-3) super().__init__(train_state_spec=(), optimizer=optimizer, name=name) self._use_relu_mlp = use_relu_mlp self._output_dim = output_dim if output_dim is None: self._output_dim = input_tensor_spec.shape[0] if net is None: if use_relu_mlp: net = ReluMLP(input_tensor_spec=input_tensor_spec, hidden_layers=hidden_layers, activation=activation) else: net = EncodingNetwork(input_tensor_spec=input_tensor_spec, fc_layer_params=hidden_layers, use_fc_bn=use_bn, activation=activation, last_layer_size=self._output_dim, last_activation=math_ops.identity, last_use_fc_bn=use_bn, name='Critic') self._net = net
def test_non_rnn(self): input_spec = TensorSpec((100, ), torch.float32) embedding = input_spec.zeros(outer_dims=(6, )) network = EncodingNetwork(input_tensor_spec=input_spec, fc_layer_params=(30, 40, 50), activation=torch.tanh) replicas = 4 num_layers = 3 pnet = NaiveParallelNetwork(network, replicas) self.assertEqual(len(list(pnet.parameters())), num_layers * 2 * replicas) output, _ = pnet(embedding) self.assertEqual(output.shape, (6, replicas, 50)) self.assertEqual(pnet.output_spec.shape, (replicas, 50))
def __init__(self, skill_spec, encoding_net: EncodingNetwork, reward_adapt_speed=8.0, observation_spec=None, hidden_size=(), hidden_activation=torch.relu_, name="DIAYNAlgorithm"): """Create a DIAYNAlgorithm. Args: skill_spec (TensorSpec): supports both discrete and continuous skills. In the discrete case, the algorithm will predict 1-of-K skills using the cross entropy loss; in the continuous case, the algorithm will predict the skill vector itself using the mean square error loss. encoding_net (EncodingNetwork): network for encoding observation into a latent feature. reward_adapt_speed (float): how fast to adapt the reward normalizer. rouphly speaking, the statistics for the normalization is calculated mostly based on the most recent `T/speed` samples, where `T` is the total number of samples. observation_spec (TensorSpec): If not None, this spec is to be used by a observation normalizer to normalize incoming observations. In some cases, the normalized observation can be easier for training the discriminator. hidden_size (tuple[int]): a tuple of hidden layer sizes used by the discriminator. hidden_activation (torch.nn.functional): activation for the hidden layers. name (str): module's name """ assert isinstance(skill_spec, TensorSpec) self._skill_spec = skill_spec if skill_spec.is_discrete: assert isinstance(skill_spec, BoundedTensorSpec) skill_dim = skill_spec.maximum - skill_spec.minimum + 1 else: assert len( skill_spec.shape) == 1, "Only 1D skill vector is supported" skill_dim = skill_spec.shape[0] super().__init__( train_state_spec=TensorSpec((skill_dim, )), predict_state_spec=(), # won't be needed for predict_step name=name) self._encoding_net = encoding_net self._discriminator_net = EncodingNetwork( input_tensor_spec=encoding_net.output_spec, fc_layer_params=hidden_size, activation=hidden_activation, last_layer_size=skill_dim, last_activation=math_ops.identity) self._reward_normalizer = ScalarAdaptiveNormalizer( speed=reward_adapt_speed) self._observation_normalizer = None if observation_spec is not None: self._observation_normalizer = AdaptiveNormalizer( tensor_spec=observation_spec)
def create_simple_encoding_net(observation_spec): return EncodingNetwork( input_tensor_spec=observation_spec, fc_layer_params=(256, 256))
def __init__(self, x_spec, y_spec, model=None, fc_layers=(256, ), sampler='buffer', buffer_size=65536, optimizer: torch.optim.Optimizer = None, estimator_type='DV', averager: EMAverager = None, name="MIEstimator"): """ Args: x_spec (nested TensorSpec): spec of ``x`` y_spec (nested TensorSpec): spec of ``y`` model (Network): can be called as ``model([x, y])`` and return a Tensor with ``shape=[batch_size, 1]``. If None, a default MLP with ``fc_layers`` will be created. fc_layers (tuple[int]): size of hidden layers. Only used if model is None. sampler (str): type of sampler used to get samples from marginal distribution, should be one of ``['buffer', 'double_buffer', 'shuffle', 'shift']``. buffer_size (int): capacity of buffer for storing y for sampler 'buffer' and 'double_buffer'. optimzer (torch.optim.Optimzer): optimizer estimator_type (str): one of 'DV', 'KLD' or 'JSD' averager (EMAverager): averager used to maintain a moving average of :math:`exp(T)`. Only used for 'DV' estimator. If None, a ScalarAdaptiveAverager will be created. name (str): name of this estimator """ assert estimator_type in ['ML', 'DV', 'KLD', 'JSD' ], "Wrong estimator_type %s" % estimator_type super().__init__(train_state_spec=(), optimizer=optimizer, name=name) self._x_spec = x_spec self._y_spec = y_spec if model is None: if estimator_type == 'ML': model = EncodingNetwork( name="MIEstimator", input_tensor_spec=x_spec, fc_layer_params=fc_layers, preprocessing_combiner=NestConcat(dim=-1)) else: model = EncodingNetwork( name="MIEstimator", input_tensor_spec=[x_spec, y_spec], preprocessing_combiner=NestConcat(dim=-1), fc_layer_params=fc_layers, last_layer_size=1, last_activation=math_ops.identity) self._model = model self._type = estimator_type if sampler == 'buffer': self._y_buffer = DataBuffer(y_spec, capacity=buffer_size) self._sampler = self._buffer_sampler elif sampler == 'double_buffer': self._x_buffer = DataBuffer(x_spec, capacity=buffer_size) self._y_buffer = DataBuffer(y_spec, capacity=buffer_size) self._sampler = self._double_buffer_sampler elif sampler == 'shuffle': self._sampler = self._shuffle_sampler elif sampler == 'shift': self._sampler = self._shift_sampler else: raise TypeError("Wrong type for sampler %s" % sampler) if estimator_type == 'DV': if averager is None: averager = ScalarAdaptiveAverager() self._mean_averager = averager if estimator_type == 'ML': assert isinstance( y_spec, alf.TensorSpec), ("Currently, 'ML' does " "not support nested y_spec: %s" % y_spec) assert y_spec.is_continuous, ("Currently, 'ML' does " "not support discreted y_spec: %s" % y_spec) hidden_size = self._model.output_spec.shape[-1] self._delta_loc_layer = alf.layers.FC( hidden_size, y_spec.shape[-1], kernel_initializer=torch.nn.init.zeros_, bias_init_value=0.0) self._delta_scale_layer = alf.layers.FC( hidden_size, y_spec.shape[-1], kernel_initializer=torch.nn.init.zeros_, bias_init_value=math.log(math.e - 1))
def __init__(self, input_tensor_spec: TensorSpec, action_spec: BoundedTensorSpec, input_preprocessors=None, preprocessing_combiner=None, conv_layer_params=None, fc_layer_params=None, activation=torch.relu_, kernel_initializer=None, name="QNetwork"): """Creates an instance of ``QNetwork`` for estimating action-value of discrete actions. The action-value is defined as the expected return starting from the given input observation and taking the given action. It takes observation as input and outputs an action-value tensor with the shape of ``[batch_size, num_of_actions]``. Args: input_tensor_spec (TensorSpec): the tensor spec of the input action_spec (TensorSpec): the tensor spec of the action input_preprocessors (nested InputPreprocessor): a nest of ``InputPreprocessor``, each of which will be applied to the corresponding input. If not None, then it must have the same structure with ``input_tensor_spec`` (after reshaping). If any element is None, then it will be treated as ``math_ops.identity``. This arg is helpful if you want to have separate preprocessings for different inputs by configuring a gin file without changing the code. For example, embedding a discrete input before concatenating it to another continuous vector. preprocessing_combiner (NestCombiner): preprocessing called on complex inputs. Note that this combiner must also accept ``input_tensor_spec`` as the input to compute the processed tensor spec. For example, see ``alf.nest.utils.NestConcat``. This arg is helpful if you want to combine inputs by configuring a gin file without changing the code. conv_layer_params (tuple[tuple]): a tuple of tuples where each tuple takes a format ``(filters, kernel_size, strides, padding)``, where ``padding`` is optional. fc_layer_params (tuple[int]): a tuple of integers representing hidden FC layer sizes. activation (nn.functional): activation used for hidden layers. The last layer will not be activated. kernel_initializer (Callable): initializer for all the layers but the last layer. If none is provided a default ``variance_scaling_initializer`` will be used. """ super(QNetwork, self).__init__(input_tensor_spec, name=name) assert len(nest.flatten(action_spec)) == 1, ( "Currently only support a single discrete action! Use " "CriticNetwork instead for multiple actions.") num_actions = action_spec.maximum - action_spec.minimum + 1 self._output_spec = TensorSpec((num_actions, )) self._encoding_net = EncodingNetwork( input_tensor_spec=input_tensor_spec, input_preprocessors=input_preprocessors, preprocessing_combiner=preprocessing_combiner, conv_layer_params=conv_layer_params, fc_layer_params=fc_layer_params, activation=activation, kernel_initializer=kernel_initializer) last_kernel_initializer = functools.partial(torch.nn.init.uniform_, \ a=-0.003, b=0.003) self._final_layer = layers.FC( self._encoding_net.output_spec.shape[0], num_actions, activation=math_ops.identity, kernel_initializer=last_kernel_initializer, bias_init_value=-0.2)
def __init__(self, input_tensor_spec, conv_layer_params=None, fc_layer_params=None, activation=torch.relu_, last_layer_param=None, last_activation=None, noise_dim=32, hidden_layers=(64, 64), use_fc_bn=False, num_particles=10, entropy_regularization=1., critic_optimizer=None, critic_hidden_layers=(100, 100), function_vi=False, function_bs=None, function_extra_bs_ratio=0.1, function_extra_bs_sampler='uniform', function_extra_bs_std=1., loss_type="classification", voting="soft", par_vi="svgd", optimizer=None, logging_network=False, logging_training=False, logging_evaluate=False, config: TrainerConfig = None, name="HyperNetwork"): """ Args: Args for the generated parametric network ==================================================================== input_tensor_spec (nested TensorSpec): the (nested) tensor spec of the input. If nested, then ``preprocessing_combiner`` must not be None. conv_layer_params (tuple[tuple]): a tuple of tuples where each tuple takes a format ``(filters, kernel_size, strides, padding, pooling_kernel)``, where ``padding`` and ``pooling_kernel`` are optional. fc_layer_params (tuple[tuple]): a tuple of tuples where each tuple takes a format ``(FC layer sizes. use_bias)``, where ``use_bias`` is optional. activation (nn.functional): activation used for all the layers but the last layer. last_layer_param (tuple): an optional tuple of the format ``(size, use_bias)``, where ``use_bias`` is optional, it appends an additional layer at the very end. Note that if ``last_activation`` is specified, ``last_layer_param`` has to be specified explicitly. last_activation (nn.functional): activation function of the additional layer specified by ``last_layer_param``. Note that if ``last_layer_param`` is not None, ``last_activation`` has to be specified explicitly. Args for the generator ==================================================================== noise_dim (int): dimension of noise hidden_layers (tuple): size of hidden layers. use_fc_bn (bool): whether use batnch normalization for fc layers. num_particles (int): number of sampling particles entropy_regularization (float): weight of entropy regularization Args for the critic (used when par_vi is ``minmax``) ==================================================================== critic_optimizer (torch.optim.Optimizer): the optimizer for training critic. critic_hidden_layers (tuple): sizes of critic hidden layeres. Args for function_vi ==================================================================== function_vi (bool): whether to use funciton value based par_vi, current supported by [``svgd2``, ``svgd3``, ``gfsf``]. function_bs (int): mini batch size for par_vi training. Needed for critic initialization when function_vi is True. function_extra_bs_ratio (float): ratio of extra sampled batch size w.r.t. the function_bs. function_extra_bs_sampler (str): type of sampling method for extra training batch, types are [``uniform``, ``normal``]. function_extra_bs_std (float): std of the normal distribution for sampling extra training batch when using normal sampler. Args for training and testing ==================================================================== loss_type (str): loglikelihood type for the generated functions, types are [``classification``, ``regression``] voting (str): types of voting results from sampled functions, types are [``soft``, ``hard``] par_vi (str): types of particle-based methods for variational inference, types are [``svgd``, ``svgd2``, ``svgd3``, ``gfsf``, ``minmax``], * svgd: empirical expectation of SVGD is evaluated by a single resampled particle. The main benefit of this choice is it supports conditional case, while all other options do not. * svgd2: empirical expectation of SVGD is evaluated by splitting half of the sampled batch. It is a trade-off between computational efficiency and convergence speed. * svgd3: empirical expectation of SVGD is evaluated by resampled particles of the same batch size. It has better convergence but involves resampling, so less efficient computaionally comparing with svgd2. * gfsf: wasserstein gradient flow with smoothed functions. It involves a kernel matrix inversion, so computationally most expensive, but in some case the convergence seems faster than svgd approaches. * minmax: Fisher Neural Sampler, optimal descent direction of the Stein discrepancy is solved by an inner optimization procedure in the space of L2 neural networks. optimizer (torch.optim.Optimizer): The optimizer for training generator. logging_network (bool): whether logging the archetectures of networks. logging_training (bool): whether logging loss and acc during training. logging_evaluate (bool): whether logging loss and acc of evaluate. config (TrainerConfig): configuration for training name (str): """ super().__init__(train_state_spec=(), optimizer=optimizer, name=name) param_net = ParamNetwork(input_tensor_spec=input_tensor_spec, conv_layer_params=conv_layer_params, fc_layer_params=fc_layer_params, activation=activation, last_layer_param=last_layer_param, last_activation=last_activation) gen_output_dim = param_net.param_length noise_spec = TensorSpec(shape=(noise_dim, )) net = EncodingNetwork(noise_spec, fc_layer_params=hidden_layers, use_fc_bn=use_fc_bn, last_layer_size=gen_output_dim, last_activation=math_ops.identity, name="Generator") if logging_network: logging.info("Generated network") logging.info("-" * 68) logging.info(param_net) logging.info("Generator network") logging.info("-" * 68) logging.info(net) if par_vi == 'svgd': par_vi = 'svgd3' if function_vi: assert par_vi in ('svgd2', 'svgd3', 'gfsf'), ( "Function_vi is not support for par_vi method: %s" % par_vi) assert function_bs is not None, ( "Need to specify batch_size of function outputs.") assert function_extra_bs_sampler in ('uniform', 'normal'), ( "Unsupported sampling type %s for extra training batch" % (function_extra_bs_sampler)) self._function_extra_bs = math.ceil(function_bs * function_extra_bs_ratio) self._function_extra_bs_sampler = function_extra_bs_sampler self._function_extra_bs_std = function_extra_bs_std critic_input_dim = (function_bs + self._function_extra_bs) * last_layer_param[0] else: critic_input_dim = gen_output_dim self._generator = Generator( gen_output_dim, noise_dim=noise_dim, net=net, entropy_regularization=entropy_regularization, par_vi=par_vi, critic_input_dim=critic_input_dim, critic_hidden_layers=critic_hidden_layers, optimizer=None, critic_optimizer=critic_optimizer, name=name) self._param_net = param_net self._num_particles = num_particles self._entropy_regularization = entropy_regularization self._train_loader = None self._test_loader = None self._use_fc_bn = use_fc_bn self._loss_type = loss_type self._function_vi = function_vi self._logging_training = logging_training self._logging_evaluate = logging_evaluate self._config = config assert (voting in ['soft', 'hard']), ('voting only supports "soft" and "hard"') self._voting = voting if loss_type == 'classification': self._loss_func = classification_loss self._vote = self._classification_vote elif loss_type == 'regression': self._loss_func = regression_loss self._vote = self._regression_vote else: raise ValueError("Unsupported loss_type: %s" % loss_type)
def __init__(self, input_tensor_spec, conv_layer_params=None, fc_layer_params=None, activation=torch.relu_, last_layer_param=None, last_activation=None, noise_dim=32, hidden_layers=(64, 64), use_fc_bn=False, num_particles=10, entropy_regularization=1., loss_type="classification", voting="soft", par_vi="svgd", optimizer=None, logging_network=False, logging_training=False, logging_evaluate=False, config: TrainerConfig = None, name="HyperNetwork"): """ Args: Args for the generated parametric network ==================================================================== input_tensor_spec (nested TensorSpec): the (nested) tensor spec of the input. If nested, then ``preprocessing_combiner`` must not be None. conv_layer_params (tuple[tuple]): a tuple of tuples where each tuple takes a format ``(filters, kernel_size, strides, padding, pooling_kernel)``, where ``padding`` and ``pooling_kernel`` are optional. fc_layer_params (tuple[tuple]): a tuple of tuples where each tuple takes a format ``(FC layer sizes. use_bias)``, where ``use_bias`` is optional. activation (nn.functional): activation used for all the layers but the last layer. last_layer_param (tuple): an optional tuple of the format ``(size, use_bias)``, where ``use_bias`` is optional, it appends an additional layer at the very end. Note that if ``last_activation`` is specified, ``last_layer_param`` has to be specified explicitly. last_activation (nn.functional): activation function of the additional layer specified by ``last_layer_param``. Note that if ``last_layer_param`` is not None, ``last_activation`` has to be specified explicitly. Args for the generator ==================================================================== noise_dim (int): dimension of noise hidden_layers (tuple): size of hidden layers. use_fc_bn (bool): whether use batnch normalization for fc layers. num_particles (int): number of sampling particles entropy_regularization (float): weight of entropy regularization Args for training and testing ==================================================================== loss_type (str): loglikelihood type for the generated functions, types are [``classification``, ``regression``] voting (str): types of voting results from sampled functions, types are [``soft``, ``hard``] par_vi (str): types of particle-based methods for variational inference, types are [``svgd``, ``svgd2``, ``svgd3``, ``gfsf``] optimizer (torch.optim.Optimizer): The optimizer for training. logging_network (bool): whether logging the archetectures of networks. logging_training (bool): whether logging loss and acc during training. logging_evaluate (bool): whether logging loss and acc of evaluate. config (TrainerConfig): configuration for training name (str): """ super().__init__(train_state_spec=(), optimizer=optimizer, name=name) param_net = ParamNetwork(input_tensor_spec=input_tensor_spec, conv_layer_params=conv_layer_params, fc_layer_params=fc_layer_params, activation=activation, last_layer_param=last_layer_param, last_activation=last_activation) gen_output_dim = param_net.param_length noise_spec = TensorSpec(shape=(noise_dim, )) net = EncodingNetwork(noise_spec, fc_layer_params=hidden_layers, use_fc_bn=use_fc_bn, last_layer_size=gen_output_dim, last_activation=math_ops.identity, name="Generator") if logging_network: logging.info("Generated network") logging.info("-" * 68) logging.info(param_net) logging.info("Generator network") logging.info("-" * 68) logging.info(net) if par_vi == 'svgd': par_vi = 'svgd3' self._generator = Generator( gen_output_dim, noise_dim=noise_dim, net=net, entropy_regularization=entropy_regularization, par_vi=par_vi, optimizer=None, name=name) self._param_net = param_net self._num_particles = num_particles self._entropy_regularization = entropy_regularization self._train_loader = None self._test_loader = None self._use_fc_bn = use_fc_bn self._loss_type = loss_type self._logging_training = logging_training self._logging_evaluate = logging_evaluate self._config = config assert (voting in ['soft', 'hard']), ('voting only supports "soft" and "hard"') self._voting = voting if loss_type == 'classification': self._loss_func = classification_loss self._vote = self._classification_vote elif loss_type == 'regression': self._loss_func = regression_loss self._vote = self._regression_vote else: raise ValueError("Unsupported loss_type: %s" % loss_type)
def __init__(self, output_dim, noise_dim=32, input_tensor_spec=None, hidden_layers=(256, ), net: Network = None, net_moving_average_rate=None, entropy_regularization=0., mi_weight=None, mi_estimator_cls=MIEstimator, par_vi="gfsf", optimizer=None, name="Generator"): r"""Create a Generator. Args: output_dim (int): dimension of output noise_dim (int): dimension of noise input_tensor_spec (nested TensorSpec): spec of inputs. If there is no inputs, this should be None. hidden_layers (tuple): size of hidden layers. net (Network): network for generating outputs from [noise, inputs] or noise (if inputs is None). If None, a default one with hidden_layers will be created net_moving_average_rate (float): If provided, use a moving average version of net to do prediction. This has been shown to be effective for GAN training (arXiv:1907.02544, arXiv:1812.04948). entropy_regularization (float): weight of entropy regularization mi_estimator_cls (type): the class of mutual information estimator for maximizing the mutual information between [noise, inputs] and [outputs, inputs]. par_vi (string): ParVI methods, options are [``svgd``, ``svgd2``, ``svgd3``, ``gfsf``], * svgd: empirical expectation of SVGD is evaluated by a single resampled particle. The main benefit of this choice is it supports conditional case, while all other options do not. * svgd2: empirical expectation of SVGD is evaluated by splitting half of the sampled batch. It is a trade-off between computational efficiency and convergence speed. * svgd3: empirical expectation of SVGD is evaluated by resampled particles of the same batch size. It has better convergence but involves resampling, so less efficient computaionally comparing with svgd2. * gfsf: wasserstein gradient flow with smoothed functions. It involves a kernel matrix inversion, so computationally most expensive, but in some case the convergence seems faster than svgd approaches. optimizer (torch.optim.Optimizer): (optional) optimizer for training name (str): name of this generator """ super().__init__(train_state_spec=(), optimizer=optimizer, name=name) self._noise_dim = noise_dim self._entropy_regularization = entropy_regularization self._par_vi = par_vi if entropy_regularization == 0: self._grad_func = self._ml_grad else: if par_vi == 'gfsf': self._grad_func = self._gfsf_grad elif par_vi == 'svgd': self._grad_func = self._svgd_grad elif par_vi == 'svgd2': self._grad_func = self._svgd_grad2 elif par_vi == 'svgd3': self._grad_func = self._svgd_grad3 else: raise ValueError("Unsupported par_vi method: %s" % par_vi) self._kernel_width_averager = AdaptiveAverager( tensor_spec=TensorSpec(shape=())) noise_spec = TensorSpec(shape=(noise_dim, )) if net is None: net_input_spec = noise_spec if input_tensor_spec is not None: net_input_spec = [net_input_spec, input_tensor_spec] net = EncodingNetwork(input_tensor_spec=net_input_spec, fc_layer_params=hidden_layers, last_layer_size=output_dim, last_activation=math_ops.identity, name="Generator") self._mi_estimator = None self._input_tensor_spec = input_tensor_spec if mi_weight is not None: x_spec = noise_spec y_spec = TensorSpec((output_dim, )) if input_tensor_spec is not None: x_spec = [x_spec, input_tensor_spec] self._mi_estimator = mi_estimator_cls(x_spec, y_spec, sampler='shift') self._mi_weight = mi_weight self._net = net self._predict_net = None self._net_moving_average_rate = net_moving_average_rate if net_moving_average_rate: self._predict_net = net.copy(name="Genrator_average") self._predict_net_updater = common.get_target_updater( self._net, self._predict_net, tau=net_moving_average_rate)
def __init__(self, action_spec, encoders, decoders, num_read_keys=3, lstm_size=(256, 256), latent_dim=200, memory_size=1350, loss_weight=1.0, name="mbp"): """ Args: action_spec (nested BoundedTensorSpec): representing the actions. encoders (nested Network): the nest should match observation_spec decoders (nested Algorithm): the nest should match observation_spec num_read_keys (int): number of keys for reading memory. lstm_size (list[int]): size of lstm layers for MBP and MBA latent_dim (int): the dimension of the hidden representation of VAE. memroy_size (int): number of memory slots loss_weight (float): weight for the loss name (str): name of the algorithm. """ action_encoder = SimpleActionEncoder(action_spec) memory = MemoryWithUsage( latent_dim, memory_size, name=name + "/memory") rnn_input_size = (latent_dim + num_read_keys * latent_dim + action_encoder.output_spec.shape[0]) rnn = LSTMEncodingNetwork( input_tensor_spec=alf.TensorSpec((rnn_input_size, )), hidden_size=lstm_size, name=name + "/lstm") state_spec = MBPState( latent_vector=alf.TensorSpec((latent_dim, )), mem_readout=alf.TensorSpec((num_read_keys * latent_dim, )), rnn_state=rnn.state_spec, memory=memory.state_spec) super().__init__(train_state_spec=state_spec, name=name) self._encoders = encoders self._decoders = decoders self._action_encoder = action_encoder self._rnn = rnn self._memory = memory self._key_net = self._memory.create_keynet(rnn.output_spec, num_read_keys) prior_network = EncodingNetwork( input_tensor_spec=(rnn.output_spec, state_spec.mem_readout), preprocessing_combiner=alf.nest.utils.NestConcat(), fc_layer_params=(2 * latent_dim, 2 * latent_dim), activation=torch.tanh, last_layer_size=2 * latent_dim, last_activation=math_ops.identity, name=name + "/prior_network") encoder_output_specs = alf.nest.map_structure( lambda encoder: encoder.output_spec, self._encoders) self._vae = VariationalAutoEncoder( latent_dim, input_tensor_spec=encoder_output_specs, z_prior_network=prior_network, name=name + "/vae") self._loss_weight = loss_weight
def __init__(self, action_spec, observation_spec=None, hidden_size=256, reward_adapt_speed=8.0, encoding_net: EncodingNetwork = None, forward_net: EncodingNetwork = None, inverse_net: EncodingNetwork = None, activation=torch.relu_, optimizer=None, name="ICMAlgorithm"): """Create an ICMAlgorithm. Args action_spec (nested TensorSpec): agent's action spec observation_spec (nested TensorSpec): agent's observation spec. If not None, then a normalizer will be used to normalize the observation. hidden_size (int or tuple[int]): size of hidden layer(s) reward_adapt_speed (float): how fast to adapt the reward normalizer. rouphly speaking, the statistics for the normalization is calculated mostly based on the most recent T/speed samples, where T is the total number of samples. encoding_net (Network): network for encoding observation into a latent feature. Its input is same as the input of this algorithm. forward_net (Network): network for predicting next feature based on previous feature and action. It should accept input with spec [feature_spec, encoded_action_spec] and output a tensor of shape feature_spec. For discrete action, encoded_action is an one-hot representation of the action. For continuous action, encoded action is same as the original action. inverse_net (Network): network for predicting previous action given the previous feature and current feature. It should accept input with spec [feature_spec, feature_spec] and output tensor of shape (num_actions,). activation (torch.nn.functional): activation used for constructing any of the forward net and inverse net, if not provided. optimizer (torch.optim.Optimizer): The optimizer for training name (str): """ if encoding_net is not None: feature_spec = encoding_net.output_spec else: feature_spec = observation_spec super(ICMAlgorithm, self).__init__( train_state_spec=feature_spec, predict_state_spec=(), optimizer=optimizer, name=name) flat_action_spec = alf.nest.flatten(action_spec) assert len( flat_action_spec) == 1, "ICM doesn't suport nested action_spec" flat_feature_spec = alf.nest.flatten(feature_spec) assert len( flat_feature_spec) == 1, "ICM doesn't support nested feature_spec" action_spec = flat_action_spec[0] if action_spec.is_discrete: self._num_actions = int(action_spec.maximum - action_spec.minimum + 1) else: self._num_actions = action_spec.shape[-1] self._action_spec = action_spec self._observation_normalizer = None if observation_spec is not None: self._observation_normalizer = AdaptiveNormalizer( tensor_spec=observation_spec) feature_dim = flat_feature_spec[0].shape[-1] self._encoding_net = encoding_net if isinstance(hidden_size, int): hidden_size = (hidden_size, ) if forward_net is None: encoded_action_spec = TensorSpec((self._num_actions, ), dtype=torch.float32) forward_net = EncodingNetwork( name="forward_net", input_tensor_spec=[feature_spec, encoded_action_spec], preprocessing_combiner=NestConcat(), fc_layer_params=hidden_size, activation=activation, last_layer_size=feature_dim, last_activation=math_ops.identity) self._forward_net = forward_net if inverse_net is None: inverse_net = EncodingNetwork( name="inverse_net", input_tensor_spec=[feature_spec, feature_spec], preprocessing_combiner=NestConcat(), fc_layer_params=hidden_size, activation=activation, last_layer_size=self._num_actions, last_activation=math_ops.identity, last_kernel_initializer=torch.nn.init.zeros_) self._inverse_net = inverse_net self._reward_normalizer = ScalarAdaptiveNormalizer( speed=reward_adapt_speed)
def test_conditional_vae(self): """Test for one dimensional Gaussion, conditioned on a Bernoulli variable. """ prior_input_spec = BoundedTensorSpec((), 'int64') z_prior_network = EncodingNetwork( TensorSpec( (prior_input_spec.maximum - prior_input_spec.minimum + 1, )), fc_layer_params=(10, ) * 2, last_layer_size=2 * self._latent_dim, last_activation=math_ops.identity) preprocess_network = EncodingNetwork( input_tensor_spec=( z_prior_network.input_tensor_spec, self._input_spec, z_prior_network.output_spec, ), preprocessing_combiner=NestConcat(), fc_layer_params=(10, ) * 2, last_layer_size=self._latent_dim, last_activation=math_ops.identity) encoder = vae.VariationalAutoEncoder( self._latent_dim, preprocess_network=preprocess_network, z_prior_network=z_prior_network) decoding_layers = FC(self._latent_dim, 1) optimizer = torch.optim.Adam( list(encoder.parameters()) + list(decoding_layers.parameters()), lr=0.1) x_train = self._input_spec.randn(outer_dims=(10000, )) y_train = x_train.clone() y_train[:5000] = y_train[:5000] + 1.0 pr_train = torch.cat([ prior_input_spec.zeros(outer_dims=(5000, )), prior_input_spec.ones(outer_dims=(5000, )) ], dim=0) x_test = self._input_spec.randn(outer_dims=(100, )) y_test = x_test.clone() y_test[:50] = y_test[:50] + 1.0 pr_test = torch.cat([ prior_input_spec.zeros(outer_dims=(50, )), prior_input_spec.ones(outer_dims=(50, )) ], dim=0) pr_test = torch.nn.functional.one_hot( pr_test, int(z_prior_network.input_tensor_spec.shape[0])).to(torch.float32) for _ in range(self._epochs): idx = torch.randperm(x_train.shape[0]) x_train = x_train[idx] y_train = y_train[idx] pr_train = pr_train[idx] for i in range(0, x_train.shape[0], self._batch_size): optimizer.zero_grad() batch = x_train[i:i + self._batch_size] y_batch = y_train[i:i + self._batch_size] pr_batch = torch.nn.functional.one_hot( pr_train[i:i + self._batch_size], int(z_prior_network.input_tensor_spec.shape[0])).to( torch.float32) alg_step = encoder.train_step([pr_batch, batch]) outputs = decoding_layers(alg_step.output) loss = torch.mean(100 * self._loss_f(y_batch - outputs) + alg_step.info.loss) loss.backward() optimizer.step() y_hat_test = decoding_layers( encoder.train_step([pr_test, x_test]).output) reconstruction_loss = float( torch.mean(self._loss_f(y_test - y_hat_test))) print("reconstruction_loss:", reconstruction_loss) self.assertLess(reconstruction_loss, 0.05)