def __init__( self, *, input_size: int, filters: Tuple[Tuple[int]] = ( (1024, 5, 2), (128, 5, 2), (64, 6, 2), (32, 6, 2), ), initializer="default", bias_init=0, activation_fn: str = "relu", output_shape: Tuple[int] = (3, 64, 64) ): """Initializes a TransposedConv2DStack instance. Args: input_size: The size of the 1D input vector, from which to generate the image distribution. filters (Tuple[Tuple[int]]): Tuple of filter setups (1 for each ConvTranspose2D layer): [in_channels, kernel, stride]. initializer (Union[str]): bias_init: The initial bias values to use. activation_fn: Activation function descriptor (str). output_shape (Tuple[int]): Shape of the final output image. """ super().__init__() self.activation = get_activation_fn(activation_fn, framework="torch") self.output_shape = output_shape initializer = get_initializer(initializer, framework="torch") in_channels = filters[0][0] self.layers = [ # Map from 1D-input vector to correct initial size for the # Conv2DTransposed stack. nn.Linear(input_size, in_channels), # Reshape from the incoming 1D vector (input_size) to 1x1 image # format (channels first). Reshape([-1, in_channels, 1, 1]), ] for i, (_, kernel, stride) in enumerate(filters): out_channels = ( filters[i + 1][0] if i < len(filters) - 1 else output_shape[0] ) conv_transp = nn.ConvTranspose2d(in_channels, out_channels, kernel, stride) # Apply initializer. initializer(conv_transp.weight) nn.init.constant_(conv_transp.bias, bias_init) self.layers.append(conv_transp) # Apply activation function, if provided and if not last layer. if self.activation is not None and i < len(filters) - 1: self.layers.append(self.activation()) # num-outputs == num-inputs for next layer. in_channels = out_channels self._model = nn.Sequential(*self.layers)
def __init__( self, policy: Policy, gamma: float, model: ModelConfigDict = None, n_iters: int = 160, lr: float = 1e-3, delta: float = 1e-4, clip_grad_norm: float = 100.0, batch_size: int = 32, ) -> None: """ Args: policy: Policy to evaluate. gamma: Discount factor of the environment. # The ModelConfigDict for self.q_model model = { "fcnet_hiddens": [8, 8], "fcnet_activation": "relu", "vf_share_layers": True, }, # Maximum number of training iterations to run on the batch n_iters = 160, # Learning rate for Q-function optimizer lr = 1e-3, # Early stopping if the mean loss < delta delta = 1e-4, # Clip gradients to this maximum value clip_grad_norm = 100.0, # Minibatch size for training Q-function batch_size = 32, """ self.policy = policy self.gamma = gamma self.observation_space = policy.observation_space self.action_space = policy.action_space if model is None: model = { "fcnet_hiddens": [8, 8], "fcnet_activation": "relu", "vf_share_layers": True, } self.device = self.policy.device self.q_model: TorchModelV2 = ModelCatalog.get_model_v2( self.observation_space, self.action_space, self.action_space.n, model, framework="torch", name="TorchQModel", ).to(self.device) self.n_iters = n_iters self.lr = lr self.delta = delta self.clip_grad_norm = clip_grad_norm self.batch_size = batch_size self.optimizer = torch.optim.Adam(self.q_model.variables(), self.lr) initializer = get_initializer("xavier_uniform", framework="torch") def f(m): if isinstance(m, nn.Linear): initializer(m.weight) self.initializer = f
def __init__( self, policy: Policy, gamma: float, model: ModelConfigDict = None, n_iters: int = 1, lr: float = 1e-3, delta: float = 1e-4, clip_grad_norm: float = 100.0, minibatch_size: int = None, tau: float = 1.0, ) -> None: """ Args: policy: Policy to evaluate. gamma: Discount factor of the environment. model: The ModelConfigDict for self.q_model, defaults to: { "fcnet_hiddens": [8, 8], "fcnet_activation": "relu", "vf_share_layers": True, }, n_iters: Number of gradient steps to run on batch, defaults to 1 lr: Learning rate for Q-model optimizer delta: Early stopping threshold if the mean loss < delta clip_grad_norm: Clip gradients to this maximum value minibatch_size: Minibatch size for training Q-function; if None, train on the whole batch tau: Polyak averaging factor for target Q-function """ self.policy = policy assert isinstance( policy.action_space, Discrete ), f"{self.__class__.__name__} only supports discrete action spaces!" self.gamma = gamma self.observation_space = policy.observation_space self.action_space = policy.action_space if model is None: model = { "fcnet_hiddens": [8, 8], "fcnet_activation": "relu", "vf_share_layers": True, } self.device = self.policy.device self.q_model: TorchModelV2 = ModelCatalog.get_model_v2( self.observation_space, self.action_space, self.action_space.n, model, framework="torch", name="TorchQModel", ).to(self.device) self.target_q_model: TorchModelV2 = ModelCatalog.get_model_v2( self.observation_space, self.action_space, self.action_space.n, model, framework="torch", name="TargetTorchQModel", ).to(self.device) self.n_iters = n_iters self.lr = lr self.delta = delta self.clip_grad_norm = clip_grad_norm self.minibatch_size = minibatch_size self.tau = tau self.optimizer = torch.optim.Adam(self.q_model.variables(), self.lr) initializer = get_initializer("xavier_uniform", framework="torch") # Hard update target self.update_target(tau=1.0) def f(m): if isinstance(m, nn.Linear): initializer(m.weight) self.initializer = f