def build_q_net(name_): activation = get_activation_fn(critic_hidden_activation, framework="torch") # For continuous actions: Feed obs and actions (concatenated) # through the NN. For discrete actions, only obs. q_net = nn.Sequential() ins = self.obs_ins + self.action_dim for i, n in enumerate(critic_hiddens): q_net.add_module( "{}_hidden_{}".format(name_, i), SlimFC( ins, n, initializer=torch.nn.init.xavier_uniform_, activation_fn=activation, ), ) ins = n q_net.add_module( "{}_out".format(name_), SlimFC( ins, 1, initializer=torch.nn.init.xavier_uniform_, activation_fn=None, ), ) return q_net
def _build_q_net(self, name_): # actions are concatenated with flattened obs critic_hidden_activation = self.model_config[ "critic_hidden_activation"] critic_hiddens = self.model_config["critic_hiddens"] activation = get_activation_fn(critic_hidden_activation, framework="torch") q_net = nn.Sequential() ins = (self.obs_ins if self._is_action_discrete else self.obs_ins + self.action_dim) for i, n in enumerate(critic_hiddens): q_net.add_module( f"{name_}_hidden_{i}", SlimFC( ins, n, initializer=torch.nn.init.xavier_uniform_, activation_fn=activation, ), ) ins = n q_net.add_module( f"{name_}_out", SlimFC( ins, self.action_space.n if self._is_action_discrete else 1, initializer=torch.nn.init.xavier_uniform_, activation_fn=None, ), ) return q_net
def __init__(self, in_size: int, out_size: int, initializer: Any = None, activation_fn: Any = None, use_bias: bool = True, bias_init: float = 0.0): """Creates a standard FC layer, similar to torch.nn.Linear Args: in_size(int): Input size for FC Layer out_size (int): Output size for FC Layer initializer (Any): Initializer function for FC layer weights activation_fn (Any): Activation function at the end of layer use_bias (bool): Whether to add bias weights or not bias_init (float): Initalize bias weights to bias_init const """ super(SlimFC, self).__init__() layers = [] # Actual nn.Linear layer (including correct initialization logic). linear = nn.Linear(in_size, out_size, bias=use_bias) if initializer is None: initializer = nn.init.xavier_uniform_ initializer(linear.weight) if use_bias is True: nn.init.constant_(linear.bias, bias_init) layers.append(linear) # Activation function (if any; default=None (linear)). if isinstance(activation_fn, str): activation_fn = get_activation_fn(activation_fn, "torch") if activation_fn is not None: layers.append(activation_fn()) # Put everything in sequence. self._model = nn.Sequential(*layers)
def __init__( self, *, input_size: int, filters: Tuple[Tuple[int]] = ( (1024, 5, 2), (128, 5, 2), (64, 6, 2), (32, 6, 2), ), initializer="default", bias_init=0, activation_fn: str = "relu", output_shape: Tuple[int] = (3, 64, 64) ): """Initializes a TransposedConv2DStack instance. Args: input_size: The size of the 1D input vector, from which to generate the image distribution. filters (Tuple[Tuple[int]]): Tuple of filter setups (1 for each ConvTranspose2D layer): [in_channels, kernel, stride]. initializer (Union[str]): bias_init: The initial bias values to use. activation_fn: Activation function descriptor (str). output_shape (Tuple[int]): Shape of the final output image. """ super().__init__() self.activation = get_activation_fn(activation_fn, framework="torch") self.output_shape = output_shape initializer = get_initializer(initializer, framework="torch") in_channels = filters[0][0] self.layers = [ # Map from 1D-input vector to correct initial size for the # Conv2DTransposed stack. nn.Linear(input_size, in_channels), # Reshape from the incoming 1D vector (input_size) to 1x1 image # format (channels first). Reshape([-1, in_channels, 1, 1]), ] for i, (_, kernel, stride) in enumerate(filters): out_channels = ( filters[i + 1][0] if i < len(filters) - 1 else output_shape[0] ) conv_transp = nn.ConvTranspose2d(in_channels, out_channels, kernel, stride) # Apply initializer. initializer(conv_transp.weight) nn.init.constant_(conv_transp.bias, bias_init) self.layers.append(conv_transp) # Apply activation function, if provided and if not last layer. if self.activation is not None and i < len(filters) - 1: self.layers.append(self.activation()) # num-outputs == num-inputs for next layer. in_channels = out_channels self._model = nn.Sequential(*self.layers)
def __init__( self, in_channels: int, out_channels: int, kernel: Union[int, Tuple[int, int]], stride: Union[int, Tuple[int, int]], padding: Union[int, Tuple[int, int]], # Defaulting these to nn.[..] will break soft torch import. initializer: Any = "default", activation_fn: Any = "default", bias_init: float = 0, ): """Creates a standard Conv2d layer, similar to torch.nn.Conv2d Args: in_channels(int): Number of input channels out_channels (int): Number of output channels kernel (Union[int, Tuple[int, int]]): If int, the kernel is a tuple(x,x). Elsewise, the tuple can be specified stride (Union[int, Tuple[int, int]]): Controls the stride for the cross-correlation. If int, the stride is a tuple(x,x). Elsewise, the tuple can be specified padding (Union[int, Tuple[int, int]]): Controls the amount of implicit zero-paddings during the conv operation initializer (Any): Initializer function for kernel weights activation_fn (Any): Activation function at the end of layer bias_init (float): Initalize bias weights to bias_init const """ super(SlimConv2d, self).__init__() layers = [] # Padding layer. if padding: layers.append(nn.ZeroPad2d(padding)) # Actual Conv2D layer (including correct initialization logic). conv = nn.Conv2d(in_channels, out_channels, kernel, stride) if initializer: if initializer == "default": initializer = nn.init.xavier_uniform_ initializer(conv.weight) nn.init.constant_(conv.bias, bias_init) layers.append(conv) # Activation function (if any; default=ReLu). if isinstance(activation_fn, str): if activation_fn == "default": activation_fn = nn.ReLU else: activation_fn = get_activation_fn(activation_fn, "torch") if activation_fn is not None: layers.append(activation_fn()) # Put everything in sequence. self._model = nn.Sequential(*layers)
def feed_forward(self, obs, policy_vars, policy_config): # Hacky for now, reconstruct FC network with adapted weights # @mluo: TODO for any network def fc_network( inp, network_vars, hidden_nonlinearity, output_nonlinearity, policy_config ): bias_added = False x = inp for name, param in network_vars.items(): if "kernel" in name: x = tf.matmul(x, param) elif "bias" in name: x = tf.add(x, param) bias_added = True else: raise NameError if bias_added: if "out" not in name: x = hidden_nonlinearity(x) elif "out" in name: x = output_nonlinearity(x) else: raise NameError bias_added = False return x policyn_vars = {} valuen_vars = {} log_std = None for name, param in policy_vars.items(): if "value" in name: valuen_vars[name] = param elif "log_std" in name: log_std = param else: policyn_vars[name] = param output_nonlinearity = tf.identity hidden_nonlinearity = get_activation_fn(policy_config["fcnet_activation"]) pi_new_logits = fc_network( obs, policyn_vars, hidden_nonlinearity, output_nonlinearity, policy_config ) if log_std is not None: pi_new_logits = tf.concat([pi_new_logits, 0.0 * pi_new_logits + log_std], 1) value_fn = fc_network( obs, valuen_vars, hidden_nonlinearity, output_nonlinearity, policy_config ) return pi_new_logits, tf.reshape(value_fn, [-1])
def __init__(self, in_size: int, out_size: int, sigma0: float, activation: str = "relu"): """Initializes a NoisyLayer object. Args: in_size: Input size for Noisy Layer out_size: Output size for Noisy Layer sigma0: Initialization value for sigma_b (bias noise) activation: Non-linear activation for Noisy Layer """ super().__init__() self.in_size = in_size self.out_size = out_size self.sigma0 = sigma0 self.activation = get_activation_fn(activation, framework="torch") if self.activation is not None: self.activation = self.activation() sigma_w = nn.Parameter( torch.from_numpy( np.random.uniform( low=-1.0 / np.sqrt(float(self.in_size)), high=1.0 / np.sqrt(float(self.in_size)), size=[self.in_size, out_size], )).float()) self.register_parameter("sigma_w", sigma_w) sigma_b = nn.Parameter( torch.from_numpy( np.full(shape=[out_size], fill_value=sigma0 / np.sqrt(float(self.in_size)))).float()) self.register_parameter("sigma_b", sigma_b) w = nn.Parameter( torch.from_numpy( np.full( shape=[self.in_size, self.out_size], fill_value=6 / np.sqrt(float(in_size) + float(out_size)), )).float()) self.register_parameter("w", w) b = nn.Parameter(torch.from_numpy(np.zeros([out_size])).float()) self.register_parameter("b", b)
def call(self, inputs: TensorType) -> TensorType: in_size = int(inputs.shape[1]) epsilon_in = tf.random.normal(shape=[in_size]) epsilon_out = tf.random.normal(shape=[self.out_size]) epsilon_in = self._f_epsilon(epsilon_in) epsilon_out = self._f_epsilon(epsilon_out) epsilon_w = tf.matmul(a=tf.expand_dims(epsilon_in, -1), b=tf.expand_dims(epsilon_out, 0)) epsilon_b = epsilon_out action_activation = ( tf.matmul(inputs, self.w + self.sigma_w * epsilon_w) + self.b + self.sigma_b * epsilon_b) fn = get_activation_fn(self.activation, framework="tf") if fn is not None: action_activation = fn(action_activation) return action_activation
def _create_fc_net(self, layer_dims, activation, name=None): """Given a list of layer dimensions (incl. input-dim), creates FC-net. Args: layer_dims (Tuple[int]): Tuple of layer dims, including the input dimension. activation (str): An activation specifier string (e.g. "relu"). Examples: If layer_dims is [4,8,6] we'll have a two layer net: 4->8 (8 nodes) and 8->6 (6 nodes), where the second layer (6 nodes) does not have an activation anymore. 4 is the input dimension. """ layers = ( [tf.keras.layers.Input(shape=(layer_dims[0],), name="{}_in".format(name))] if self.framework != "torch" else [] ) for i in range(len(layer_dims) - 1): act = activation if i < len(layer_dims) - 2 else None if self.framework == "torch": layers.append( SlimFC( in_size=layer_dims[i], out_size=layer_dims[i + 1], initializer=torch.nn.init.xavier_uniform_, activation_fn=act, ) ) else: layers.append( tf.keras.layers.Dense( units=layer_dims[i + 1], activation=get_activation_fn(act), name="{}_{}".format(name, i), ) ) if self.framework == "torch": return nn.Sequential(*layers) else: return tf.keras.Sequential(layers)
def _build_actor_net(self, name_): actor_hidden_activation = self.model_config["actor_hidden_activation"] actor_hiddens = self.model_config["actor_hiddens"] # Build the policy network. actor_net = nn.Sequential() activation = get_activation_fn(actor_hidden_activation, framework="torch") ins = self.obs_ins for i, n in enumerate(actor_hiddens): actor_net.add_module( f"{name_}_hidden_{i}", SlimFC( ins, n, initializer=torch.nn.init.xavier_uniform_, activation_fn=activation, ), ) ins = n # also includes log_std in continuous case n_act_out = (self.action_space.n if self._is_action_discrete else 2 * self.action_dim) actor_net.add_module( f"{name_}_out", SlimFC( ins, n_act_out, initializer=torch.nn.init.xavier_uniform_, activation_fn=None, ), ) return actor_net
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): if not model_config.get("conv_filters"): model_config["conv_filters"] = get_filter_config(obs_space.shape) super(CustomVisionNetwork, self).__init__(obs_space, action_space, num_outputs, model_config, name) activation = get_activation_fn( self.model_config.get("conv_activation"), framework="tf") filters = self.model_config["conv_filters"] assert len(filters) > 0,\ "Must provide at least 1 entry in `conv_filters`!" input_shape = obs_space.shape self.data_format = "channels_last" inputs = tf.keras.layers.Input(shape=input_shape, name="observations") #is_training = tf.keras.layers.Input( # shape=(), dtype=tf.bool, batch_size=1, name="is_training") last_layer = inputs # Whether the last layer is the output of a Flattened (rather than # a n x (1,1) Conv2D). self.last_layer_is_flattened = False # Build the action layers for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), padding="same", activation=activation, data_format="channels_last", name="conv{}".format(i))(last_layer) out_size, kernel, stride = filters[-1] p_layer = tf.keras.layers.Conv2D( filters=out_size, kernel_size=kernel, strides=(stride, stride), padding="valid", data_format="channels_last", name="conv{}".format(len(filters)))(last_layer) p_layer = tf.keras.layers.ReLU()(p_layer) v_layer = tf.keras.layers.Conv2D( filters=out_size, kernel_size=kernel, strides=(stride, stride), padding="valid", data_format="channels_last", name="conv{}".format(len(filters) + 1))(last_layer) v_layer = tf.keras.layers.ReLU()(v_layer) # last_layer = tf1.layers.AveragePooling2D((2,2),(2,2))(last_layer) p_layer = tf.keras.layers.Flatten(data_format="channels_last")(p_layer) v_layer = tf.keras.layers.Flatten(data_format="channels_last")(v_layer) self.last_layer_is_flattened = True self.num_outputs_p = p_layer.shape[1] self.num_outputs_v = v_layer.shape[1] self._value_out = v_layer self.base_model = tf.keras.Model(inputs, [p_layer, self._value_out]) self.base_model.summary()
def __init__(self, obs_space: Space, action_space: Space, num_outputs: int, model_config: Dict[str, Any], name: str, num_frames: int = 1) -> None: # Call base initializer first super().__init__(obs_space, action_space, None, model_config, name) # Backup some user arguments self.num_frames = num_frames self.num_outputs = num_outputs # Define some proxies for convenience sensor_space_start = 0 for field, space in obs_space.original_space.spaces.items(): if field != "sensors": sensor_space_start += flatdim(space) else: sensor_space_size = flatdim(space) sensor_space_end = sensor_space_start + sensor_space_size break self.sensor_space_range = [sensor_space_start, sensor_space_end] # Extract some user arguments activation = get_activation_fn(model_config.get("fcnet_activation")) no_final_linear = model_config.get("no_final_linear") hiddens = model_config.get("fcnet_hiddens", []) vf_share_layers = model_config.get("vf_share_layers") # Specify the inputs if self.num_frames > 1: self.view_requirements["prev_n_obs"] = ViewRequirement( data_col=SampleBatch.OBS, shift="-{}:-1".format(num_frames), space=obs_space) self.view_requirements["prev_n_act"] = ViewRequirement( data_col=SampleBatch.ACTIONS, shift="-{}:-1".format(num_frames), space=action_space) self.view_requirements["prev_n_rew"] = ViewRequirement( data_col=SampleBatch.REWARDS, shift="-{}:-1".format(num_frames)) # Buffer to store last computed value self._last_value = None # Define the input layer of the model stack_size = sensor_space_size + action_space.shape[0] + 1 obs = tf.keras.layers.Input(shape=obs_space.shape, name="obs") if self.num_frames > 1: stack = tf.keras.layers.Input(shape=(self.num_frames, stack_size), name="stack") inputs = [obs, stack] else: inputs = obs # Build features extraction network # In: (batch_size, n_features, n_timesteps) # Out: (batch_size, n_filters, n_timesteps - (kernel_size - 1)) if self.num_frames >= 16: conv_1 = tf.keras.layers.Conv1D(filters=4, kernel_size=5, strides=1, activation="tanh", padding="valid", name="conv_1")(stack) pool_1 = tf.keras.layers.AveragePooling1D(pool_size=2, strides=2, padding="valid", name="pool_1")(conv_1) conv_2 = tf.keras.layers.Conv1D(filters=8, kernel_size=5, strides=1, activation="tanh", padding="valid", name="conv_2")(pool_1) pool_2 = tf.keras.layers.AveragePooling1D(pool_size=2, strides=2, padding="valid", name="pool_2")(conv_2) # Gather observation and extracted features as input flatten = tf.keras.layers.Flatten(name="flatten")(pool_2) features = tf.keras.layers.Dense( units=8, name="fc_features", activation=activation, kernel_initializer=normc_initializer(1.0))(flatten) concat = tf.keras.layers.Concatenate( axis=-1, name="concat")([obs, features]) elif self.num_frames > 1: # Gather current observation and previous stack as input features = tf.keras.layers.Flatten(name="flatten")(stack) concat = tf.keras.layers.Concatenate( axis=-1, name="concat")([obs, features]) else: # Current observation is the only input concat = obs # concat = tf.keras.layers.GaussianNoise(0.1)(concat) # Create policy layers 0 to second-last. i = 1 last_layer = concat for size in hiddens[:-1]: last_layer = tf.keras.layers.Dense( units=size, name="fc_{}".format(i), activation=activation, kernel_initializer=normc_initializer(1.0))(last_layer) i += 1 # The last layer is adjusted to be of size num_outputs, but it is a # layer with activation. if no_final_linear: logits_out = tf.keras.layers.Dense( units=num_outputs, name="fc_out", activation=activation, kernel_initializer=normc_initializer(0.01))(last_layer) # Finish the layers with the provided sizes (`hiddens`), plus a last # linear layer of size num_outputs. else: last_layer = tf.keras.layers.Dense( units=hiddens[-1], name="fc_{}".format(i), activation=activation, kernel_initializer=normc_initializer(1.0))(last_layer) logits_out = tf.keras.layers.Dense( units=num_outputs, name="fc_out", activation=None, kernel_initializer=normc_initializer(0.01))(last_layer) last_vf_layer = None if not vf_share_layers: # Build a dedicated hidden layers for the value net if requested i = 1 last_vf_layer = concat for size in hiddens: last_vf_layer = tf.keras.layers.Dense( units=size, name="fc_value_{}".format(i), activation=activation, kernel_initializer=normc_initializer(1.0))(last_vf_layer) i += 1 value_out = tf.keras.layers.Dense( units=1, name="value_out", activation=None, kernel_initializer=normc_initializer(1.0))(last_vf_layer or last_layer) # Finish definition of the model self.base_model = tf.keras.Model(inputs, [logits_out, value_out])
def __init__( self, input_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: Optional[int] = None, *, name: str = "", conv_filters: Optional[Sequence[Sequence[int]]] = None, conv_activation: Optional[str] = None, post_fcnet_hiddens: Optional[Sequence[int]] = (), post_fcnet_activation: Optional[str] = None, no_final_linear: bool = False, vf_share_layers: bool = False, free_log_std: bool = False, **kwargs, ): super().__init__(name=name) if not conv_filters: conv_filters = get_filter_config(input_space.shape) assert len(conv_filters) > 0,\ "Must provide at least 1 entry in `conv_filters`!" conv_activation = get_activation_fn(conv_activation, framework="tf") post_fcnet_activation = get_activation_fn(post_fcnet_activation, framework="tf") input_shape = input_space.shape self.data_format = "channels_last" inputs = tf.keras.layers.Input(shape=input_shape, name="observations") last_layer = inputs # Whether the last layer is the output of a Flattened (rather than # a n x (1,1) Conv2D). self.last_layer_is_flattened = False # Build the action layers for i, (out_size, kernel, stride) in enumerate(conv_filters[:-1], 1): last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=stride if isinstance(stride, (list, tuple)) else (stride, stride), activation=conv_activation, padding="same", data_format="channels_last", name="conv{}".format(i))(last_layer) out_size, kernel, stride = conv_filters[-1] # No final linear: Last layer has activation function and exits with # num_outputs nodes (this could be a 1x1 conv or a FC layer, depending # on `post_fcnet_...` settings). if no_final_linear and num_outputs: last_layer = tf.keras.layers.Conv2D( out_size if post_fcnet_hiddens else num_outputs, kernel, strides=stride if isinstance(stride, (list, tuple)) else (stride, stride), activation=conv_activation, padding="valid", data_format="channels_last", name="conv_out")(last_layer) # Add (optional) post-fc-stack after last Conv2D layer. layer_sizes = post_fcnet_hiddens[:-1] + ( [num_outputs] if post_fcnet_hiddens else []) for i, out_size in enumerate(layer_sizes): last_layer = tf.keras.layers.Dense( out_size, name="post_fcnet_{}".format(i), activation=post_fcnet_activation, kernel_initializer=normc_initializer(1.0))(last_layer) # Finish network normally (w/o overriding last layer size with # `num_outputs`), then add another linear one of size `num_outputs`. else: last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=stride if isinstance(stride, (list, tuple)) else (stride, stride), activation=conv_activation, padding="valid", data_format="channels_last", name="conv{}".format(len(conv_filters)))(last_layer) # num_outputs defined. Use that to create an exact # `num_output`-sized (1,1)-Conv2D. if num_outputs: if post_fcnet_hiddens: last_cnn = last_layer = tf.keras.layers.Conv2D( post_fcnet_hiddens[0], [1, 1], activation=post_fcnet_activation, padding="same", data_format="channels_last", name="conv_out")(last_layer) # Add (optional) post-fc-stack after last Conv2D layer. for i, out_size in enumerate(post_fcnet_hiddens[1:] + [num_outputs]): last_layer = tf.keras.layers.Dense( out_size, name="post_fcnet_{}".format(i + 1), activation=post_fcnet_activation if i < len(post_fcnet_hiddens) - 1 else None, kernel_initializer=normc_initializer(1.0))( last_layer) else: last_cnn = last_layer = tf.keras.layers.Conv2D( num_outputs, [1, 1], activation=None, padding="same", data_format="channels_last", name="conv_out")(last_layer) if last_cnn.shape[1] != 1 or last_cnn.shape[2] != 1: raise ValueError( "Given `conv_filters` ({}) do not result in a [B, 1, " "1, {} (`num_outputs`)] shape (but in {})! Please " "adjust your Conv2D stack such that the dims 1 and 2 " "are both 1.".format(self.model_config["conv_filters"], num_outputs, list(last_cnn.shape))) # num_outputs not known -> Flatten. else: self.last_layer_is_flattened = True last_layer = tf.keras.layers.Flatten( data_format="channels_last")(last_layer) # Add (optional) post-fc-stack after last Conv2D layer. for i, out_size in enumerate(post_fcnet_hiddens): last_layer = tf.keras.layers.Dense( out_size, name="post_fcnet_{}".format(i), activation=post_fcnet_activation, kernel_initializer=normc_initializer(1.0))(last_layer) logits_out = last_layer # Build the value layers if vf_share_layers: if not self.last_layer_is_flattened: last_layer = tf.keras.layers.Lambda( lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer) value_out = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01))(last_layer) else: # build a parallel set of hidden layers for the value net last_layer = inputs for i, (out_size, kernel, stride) in enumerate(conv_filters[:-1], 1): last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=stride if isinstance(stride, (list, tuple)) else (stride, stride), activation=conv_activation, padding="same", data_format="channels_last", name="conv_value_{}".format(i))(last_layer) out_size, kernel, stride = conv_filters[-1] last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=stride if isinstance(stride, (list, tuple)) else (stride, stride), activation=conv_activation, padding="valid", data_format="channels_last", name="conv_value_{}".format(len(conv_filters)))(last_layer) last_layer = tf.keras.layers.Conv2D( 1, [1, 1], activation=None, padding="same", data_format="channels_last", name="conv_value_out")(last_layer) value_out = tf.keras.layers.Lambda( lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer) self.base_model = tf.keras.Model(inputs, [logits_out, value_out])
def __init__( self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str, # Extra DDPGActionModel args: actor_hiddens: List[int] = [256, 256], actor_hidden_activation: str = "relu", critic_hiddens: List[int] = [256, 256], critic_hidden_activation: str = "relu", twin_q: bool = False, add_layer_norm: bool = False): """Initialize variables of this model. Extra model kwargs: actor_hidden_activation (str): activation for actor network actor_hiddens (list): hidden layers sizes for actor network critic_hidden_activation (str): activation for critic network critic_hiddens (list): hidden layers sizes for critic network twin_q (bool): build twin Q networks. add_layer_norm (bool): Enable layer norm (for param noise). Note that the core layers for forward() are not defined here, this only defines the layers for the output heads. Those layers for forward() should be defined in subclasses of DDPGTorchModel. """ nn.Module.__init__(self) super(DDPGTorchModel, self).__init__(obs_space, action_space, num_outputs, model_config, name) self.bounded = np.logical_and(self.action_space.bounded_above, self.action_space.bounded_below).any() self.action_dim = np.product(self.action_space.shape) # Build the policy network. self.policy_model = nn.Sequential() ins = num_outputs self.obs_ins = ins activation = get_activation_fn(actor_hidden_activation, framework="torch") for i, n in enumerate(actor_hiddens): self.policy_model.add_module( "action_{}".format(i), SlimFC(ins, n, initializer=torch.nn.init.xavier_uniform_, activation_fn=activation)) # Add LayerNorm after each Dense. if add_layer_norm: self.policy_model.add_module("LayerNorm_A_{}".format(i), nn.LayerNorm(n)) ins = n self.policy_model.add_module( "action_out", SlimFC(ins, self.action_dim, initializer=torch.nn.init.xavier_uniform_, activation_fn=None)) # Use sigmoid to scale to [0,1], but also double magnitude of input to # emulate behaviour of tanh activation used in DDPG and TD3 papers. # After sigmoid squashing, re-scale to env action space bounds. class _Lambda(nn.Module): def __init__(self_): super().__init__() low_action = nn.Parameter( torch.from_numpy(self.action_space.low).float()) low_action.requires_grad = False self_.register_parameter("low_action", low_action) action_range = nn.Parameter( torch.from_numpy(self.action_space.high - self.action_space.low).float()) action_range.requires_grad = False self_.register_parameter("action_range", action_range) def forward(self_, x): sigmoid_out = nn.Sigmoid()(2.0 * x) squashed = self_.action_range * sigmoid_out + self_.low_action return squashed # Only squash if we have bounded actions. if self.bounded: self.policy_model.add_module("action_out_squashed", _Lambda()) # Build the Q-net(s), including target Q-net(s). def build_q_net(name_): activation = get_activation_fn(critic_hidden_activation, framework="torch") # For continuous actions: Feed obs and actions (concatenated) # through the NN. For discrete actions, only obs. q_net = nn.Sequential() ins = self.obs_ins + self.action_dim for i, n in enumerate(critic_hiddens): q_net.add_module( "{}_hidden_{}".format(name_, i), SlimFC(ins, n, initializer=torch.nn.init.xavier_uniform_, activation_fn=activation)) ins = n q_net.add_module( "{}_out".format(name_), SlimFC(ins, 1, initializer=torch.nn.init.xavier_uniform_, activation_fn=None)) return q_net self.q_model = build_q_net("q") if twin_q: self.twin_q_model = build_q_net("twin_q") else: self.twin_q_model = None
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): if not model_config.get("conv_filters"): model_config["conv_filters"] = get_filter_config(obs_space.shape) super(VisionNetwork, self).__init__(obs_space, action_space, num_outputs, model_config, name) activation = get_activation_fn( self.model_config.get("conv_activation"), framework="tf") filters = self.model_config["conv_filters"] assert len(filters) > 0,\ "Must provide at least 1 entry in `conv_filters`!" no_final_linear = self.model_config.get("no_final_linear") vf_share_layers = self.model_config.get("vf_share_layers") inputs = tf.keras.layers.Input(shape=obs_space.shape, name="observations") last_layer = inputs # Whether the last layer is the output of a Flattened (rather than # a n x (1,1) Conv2D). self.last_layer_is_flattened = False # Build the action layers for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), activation=activation, padding="same", data_format="channels_last", name="conv{}".format(i))(last_layer) out_size, kernel, stride = filters[-1] # No final linear: Last layer is a Conv2D and uses num_outputs. if no_final_linear and num_outputs: last_layer = tf.keras.layers.Conv2D(num_outputs, kernel, strides=(stride, stride), activation=activation, padding="valid", data_format="channels_last", name="conv_out")(last_layer) conv_out = last_layer # Finish network normally (w/o overriding last layer size with # `num_outputs`), then add another linear one of size `num_outputs`. else: last_layer = tf.keras.layers.Conv2D(out_size, kernel, strides=(stride, stride), activation=activation, padding="valid", data_format="channels_last", name="conv{}".format( len(filters)))(last_layer) # num_outputs defined. Use that to create an exact # `num_output`-sized (1,1)-Conv2D. if num_outputs: conv_out = tf.keras.layers.Conv2D(num_outputs, [1, 1], activation=None, padding="same", data_format="channels_last", name="conv_out")(last_layer) if conv_out.shape[1] != 1 or conv_out.shape[2] != 1: raise ValueError( "Given `conv_filters` ({}) do not result in a [B, 1, " "1, {} (`num_outputs`)] shape (but in {})! Please " "adjust your Conv2D stack such that the dims 1 and 2 " "are both 1.".format(self.model_config["conv_filters"], self.num_outputs, list(conv_out.shape))) # num_outputs not known -> Flatten, then set self.num_outputs # to the resulting number of nodes. else: self.last_layer_is_flattened = True conv_out = tf.keras.layers.Flatten( data_format="channels_last")(last_layer) self.num_outputs = conv_out.shape[1] # Build the value layers if vf_share_layers: last_layer = tf.keras.layers.Lambda( lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer) value_out = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01))(last_layer) else: # build a parallel set of hidden layers for the value net last_layer = inputs for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), activation=activation, padding="same", data_format="channels_last", name="conv_value_{}".format(i))(last_layer) out_size, kernel, stride = filters[-1] last_layer = tf.keras.layers.Conv2D(out_size, kernel, strides=(stride, stride), activation=activation, padding="valid", data_format="channels_last", name="conv_value_{}".format( len(filters)))(last_layer) last_layer = tf.keras.layers.Conv2D( 1, [1, 1], activation=None, padding="same", data_format="channels_last", name="conv_value_out")(last_layer) value_out = tf.keras.layers.Lambda( lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer) self.base_model = tf.keras.Model(inputs, [conv_out, value_out]) self.register_variables(self.base_model.variables)
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): super(FullyConnectedNetwork, self).__init__(obs_space, action_space, num_outputs, model_config, name) hiddens = model_config.get("fcnet_hiddens", []) + \ model_config.get("post_fcnet_hiddens", []) activation = model_config.get("fcnet_activation") if not model_config.get("fcnet_hiddens", []): activation = model_config.get("post_fcnet_activation") activation = get_activation_fn(activation) no_final_linear = model_config.get("no_final_linear") vf_share_layers = model_config.get("vf_share_layers") free_log_std = model_config.get("free_log_std") # Generate free-floating bias variables for the second half of # the outputs. if free_log_std: assert num_outputs % 2 == 0, ( "num_outputs must be divisible by two", num_outputs) num_outputs = num_outputs // 2 self.log_std_var = tf.Variable([0.0] * num_outputs, dtype=tf.float32, name="log_std") # We are using obs_flat, so take the flattened shape as input. inputs = tf.keras.layers.Input(shape=(int(np.product( obs_space.shape)), ), name="observations") # Last hidden layer output (before logits outputs). last_layer = inputs # The action distribution outputs. logits_out = None i = 1 # Create layers 0 to second-last. for size in hiddens[:-1]: last_layer = tf.keras.layers.Dense( size, name="fc_{}".format(i), activation=activation, kernel_initializer=normc_initializer(1.0))(last_layer) i += 1 # The last layer is adjusted to be of size num_outputs, but it's a # layer with activation. if no_final_linear and num_outputs: logits_out = tf.keras.layers.Dense( num_outputs, name="fc_out", activation=activation, kernel_initializer=normc_initializer(1.0))(last_layer) # Finish the layers with the provided sizes (`hiddens`), plus - # iff num_outputs > 0 - a last linear layer of size num_outputs. else: if len(hiddens) > 0: last_layer = tf.keras.layers.Dense( hiddens[-1], name="fc_{}".format(i), activation=activation, kernel_initializer=normc_initializer(1.0))(last_layer) if num_outputs: logits_out = tf.keras.layers.Dense( num_outputs, name="fc_out", activation=None, kernel_initializer=normc_initializer(0.01))(last_layer) # Adjust num_outputs to be the number of nodes in the last layer. else: self.num_outputs = ([int(np.product(obs_space.shape))] + hiddens[-1:])[-1] # Concat the log std vars to the end of the state-dependent means. if free_log_std and logits_out is not None: def tiled_log_std(x): return tf.tile(tf.expand_dims(self.log_std_var, 0), [tf.shape(x)[0], 1]) log_std_out = tf.keras.layers.Lambda(tiled_log_std)(inputs) logits_out = tf.keras.layers.Concatenate(axis=1)( [logits_out, log_std_out]) last_vf_layer = None if not vf_share_layers: # Build a parallel set of hidden layers for the value net. last_vf_layer = inputs i = 1 for size in hiddens: last_vf_layer = tf.keras.layers.Dense( size, name="fc_value_{}".format(i), activation=activation, kernel_initializer=normc_initializer(1.0))(last_vf_layer) i += 1 value_out = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01))( last_vf_layer if last_vf_layer is not None else last_layer) self.base_model = tf.keras.Model(inputs, [ (logits_out if logits_out is not None else last_layer), value_out ])
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): if not model_config.get("conv_filters"): model_config["conv_filters"] = get_filter_config(obs_space.shape) super(VisionNetwork, self).__init__(obs_space, action_space, num_outputs, model_config, name) activation = get_activation_fn( self.model_config.get("conv_activation"), framework="tf") filters = self.model_config["conv_filters"] assert len(filters) > 0,\ "Must provide at least 1 entry in `conv_filters`!" # Post FC net config. post_fcnet_hiddens = model_config.get("post_fcnet_hiddens", []) post_fcnet_activation = get_activation_fn( model_config.get("post_fcnet_activation"), framework="tf") no_final_linear = self.model_config.get("no_final_linear") vf_share_layers = self.model_config.get("vf_share_layers") self.traj_view_framestacking = False # Perform Atari framestacking via traj. view API. if model_config.get("num_framestacks") != "auto" and \ model_config.get("num_framestacks", 0) > 1: input_shape = obs_space.shape + (model_config["num_framestacks"], ) self.data_format = "channels_first" self.traj_view_framestacking = True else: input_shape = obs_space.shape self.data_format = "channels_last" inputs = tf.keras.layers.Input(shape=input_shape, name="observations") last_layer = inputs # Whether the last layer is the output of a Flattened (rather than # a n x (1,1) Conv2D). self.last_layer_is_flattened = False # Build the action layers for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=stride if isinstance(stride, (list, tuple)) else (stride, stride), activation=activation, padding="same", data_format="channels_last", name="conv{}".format(i))(last_layer) out_size, kernel, stride = filters[-1] # No final linear: Last layer has activation function and exits with # num_outputs nodes (this could be a 1x1 conv or a FC layer, depending # on `post_fcnet_...` settings). if no_final_linear and num_outputs: last_layer = tf.keras.layers.Conv2D( out_size if post_fcnet_hiddens else num_outputs, kernel, strides=stride if isinstance(stride, (list, tuple)) else (stride, stride), activation=activation, padding="valid", data_format="channels_last", name="conv_out")(last_layer) # Add (optional) post-fc-stack after last Conv2D layer. layer_sizes = post_fcnet_hiddens[:-1] + ( [num_outputs] if post_fcnet_hiddens else []) for i, out_size in enumerate(layer_sizes): last_layer = tf.keras.layers.Dense( out_size, name="post_fcnet_{}".format(i), activation=post_fcnet_activation, kernel_initializer=normc_initializer(1.0))(last_layer) # Finish network normally (w/o overriding last layer size with # `num_outputs`), then add another linear one of size `num_outputs`. else: last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=stride if isinstance(stride, (list, tuple)) else (stride, stride), activation=activation, padding="valid", data_format="channels_last", name="conv{}".format(len(filters)))(last_layer) # num_outputs defined. Use that to create an exact # `num_output`-sized (1,1)-Conv2D. if num_outputs: if post_fcnet_hiddens: last_cnn = last_layer = tf.keras.layers.Conv2D( post_fcnet_hiddens[0], [1, 1], activation=post_fcnet_activation, padding="same", data_format="channels_last", name="conv_out")(last_layer) # Add (optional) post-fc-stack after last Conv2D layer. for i, out_size in enumerate(post_fcnet_hiddens[1:] + [num_outputs]): last_layer = tf.keras.layers.Dense( out_size, name="post_fcnet_{}".format(i + 1), activation=post_fcnet_activation if i < len(post_fcnet_hiddens) - 1 else None, kernel_initializer=normc_initializer(1.0))( last_layer) else: last_cnn = last_layer = tf.keras.layers.Conv2D( num_outputs, [1, 1], activation=None, padding="same", data_format="channels_last", name="conv_out")(last_layer) if last_cnn.shape[1] != 1 or last_cnn.shape[2] != 1: raise ValueError( "Given `conv_filters` ({}) do not result in a [B, 1, " "1, {} (`num_outputs`)] shape (but in {})! Please " "adjust your Conv2D stack such that the dims 1 and 2 " "are both 1.".format(self.model_config["conv_filters"], self.num_outputs, list(last_cnn.shape))) # num_outputs not known -> Flatten, then set self.num_outputs # to the resulting number of nodes. else: self.last_layer_is_flattened = True last_layer = tf.keras.layers.Flatten( data_format="channels_last")(last_layer) # Add (optional) post-fc-stack after last Conv2D layer. for i, out_size in enumerate(post_fcnet_hiddens): last_layer = tf.keras.layers.Dense( out_size, name="post_fcnet_{}".format(i), activation=post_fcnet_activation, kernel_initializer=normc_initializer(1.0))(last_layer) self.num_outputs = last_layer.shape[1] logits_out = last_layer # Build the value layers if vf_share_layers: if not self.last_layer_is_flattened: last_layer = tf.keras.layers.Lambda( lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer) value_out = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01))(last_layer) else: # build a parallel set of hidden layers for the value net last_layer = inputs for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=stride if isinstance(stride, (list, tuple)) else (stride, stride), activation=activation, padding="same", data_format="channels_last", name="conv_value_{}".format(i))(last_layer) out_size, kernel, stride = filters[-1] last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=stride if isinstance(stride, (list, tuple)) else (stride, stride), activation=activation, padding="valid", data_format="channels_last", name="conv_value_{}".format(len(filters)))(last_layer) last_layer = tf.keras.layers.Conv2D( 1, [1, 1], activation=None, padding="same", data_format="channels_last", name="conv_value_out")(last_layer) value_out = tf.keras.layers.Lambda( lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer) self.base_model = tf.keras.Model(inputs, [logits_out, value_out]) # Optional: framestacking obs/new_obs for Atari. if self.traj_view_framestacking: from_ = model_config["num_framestacks"] - 1 self.view_requirements[SampleBatch.OBS].shift = \ "-{}:0".format(from_) self.view_requirements[SampleBatch.OBS].shift_from = -from_ self.view_requirements[SampleBatch.OBS].shift_to = 0 self.view_requirements[SampleBatch.NEXT_OBS] = ViewRequirement( data_col=SampleBatch.OBS, shift="-{}:1".format(from_ - 1), space=self.view_requirements[SampleBatch.OBS].space, used_for_compute_actions=False, )
def __init__( self, input_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: Optional[int] = None, *, name: str = "", fcnet_hiddens: Optional[Sequence[int]] = (), fcnet_activation: Optional[str] = None, post_fcnet_hiddens: Optional[Sequence[int]] = (), post_fcnet_activation: Optional[str] = None, no_final_linear: bool = False, vf_share_layers: bool = False, free_log_std: bool = False, **kwargs, ): super().__init__(name=name) hiddens = list(fcnet_hiddens or ()) + list(post_fcnet_hiddens or ()) activation = fcnet_activation if not fcnet_hiddens: activation = post_fcnet_activation activation = get_activation_fn(activation) # Generate free-floating bias variables for the second half of # the outputs. if free_log_std: assert num_outputs % 2 == 0, ( "num_outputs must be divisible by two", num_outputs, ) num_outputs = num_outputs // 2 self.log_std_var = tf.Variable([0.0] * num_outputs, dtype=tf.float32, name="log_std") # We are using obs_flat, so take the flattened shape as input. inputs = tf.keras.layers.Input(shape=(int(np.product( input_space.shape)), ), name="observations") # Last hidden layer output (before logits outputs). last_layer = inputs # The action distribution outputs. logits_out = None i = 1 # Create layers 0 to second-last. for size in hiddens[:-1]: last_layer = tf.keras.layers.Dense( size, name="fc_{}".format(i), activation=activation, kernel_initializer=normc_initializer(1.0), )(last_layer) i += 1 # The last layer is adjusted to be of size num_outputs, but it's a # layer with activation. if no_final_linear and num_outputs: logits_out = tf.keras.layers.Dense( num_outputs, name="fc_out", activation=activation, kernel_initializer=normc_initializer(1.0), )(last_layer) # Finish the layers with the provided sizes (`hiddens`), plus - # iff num_outputs > 0 - a last linear layer of size num_outputs. else: if len(hiddens) > 0: last_layer = tf.keras.layers.Dense( hiddens[-1], name="fc_{}".format(i), activation=activation, kernel_initializer=normc_initializer(1.0), )(last_layer) if num_outputs: logits_out = tf.keras.layers.Dense( num_outputs, name="fc_out", activation=None, kernel_initializer=normc_initializer(0.01), )(last_layer) # Concat the log std vars to the end of the state-dependent means. if free_log_std and logits_out is not None: def tiled_log_std(x): return tf.tile(tf.expand_dims(self.log_std_var, 0), [tf.shape(x)[0], 1]) log_std_out = tf.keras.layers.Lambda(tiled_log_std)(inputs) logits_out = tf.keras.layers.Concatenate(axis=1)( [logits_out, log_std_out]) last_vf_layer = None if not vf_share_layers: # Build a parallel set of hidden layers for the value net. last_vf_layer = inputs i = 1 for size in hiddens: last_vf_layer = tf.keras.layers.Dense( size, name="fc_value_{}".format(i), activation=activation, kernel_initializer=normc_initializer(1.0), )(last_vf_layer) i += 1 value_out = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01), )(last_vf_layer if last_vf_layer is not None else last_layer) self.base_model = tf.keras.Model(inputs, [ (logits_out if logits_out is not None else last_layer), value_out ])
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): if not model_config.get("conv_filters"): model_config["conv_filters"] = get_filter_config(obs_space.shape) super(CustomVisionNetwork, self).__init__(obs_space, action_space, num_outputs, model_config, name) activation = get_activation_fn( self.model_config.get("conv_activation"), framework="tf") filters = self.model_config["conv_filters"] assert len(filters) > 0,\ "Must provide at least 1 entry in `conv_filters`!" # Post FC net config. post_fcnet_hiddens = model_config.get("post_fcnet_hiddens", []) post_fcnet_activation = get_activation_fn( model_config.get("post_fcnet_activation"), framework="tf") no_final_linear = self.model_config.get("no_final_linear") vf_share_layers = self.model_config.get("vf_share_layers") self.traj_view_framestacking = False # Perform Atari framestacking via traj. view API. if model_config.get("num_framestacks") != "auto" and \ model_config.get("num_framestacks", 0) > 1: input_shape = obs_space.shape + (model_config["num_framestacks"], ) self.data_format = "channels_first" self.traj_view_framestacking = True else: input_shape = obs_space.shape self.data_format = "channels_last" inputs = tf.keras.layers.Input(shape=input_shape, name="observations") #is_training = tf.keras.layers.Input( # shape=(), dtype=tf.bool, batch_size=1, name="is_training") last_layer = inputs # Whether the last layer is the output of a Flattened (rather than # a n x (1,1) Conv2D). self.last_layer_is_flattened = False # Build the action layers for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): if i == 1: last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), padding="same", data_format="channels_last", name="conv{}".format(i))(last_layer) #last_layer = tf.keras.layers.BatchNormalization()(last_layer, training=is_training[0]) last_layer = tf.keras.layers.ReLU()(last_layer) else: input_layer = last_layer last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), padding="same", data_format="channels_last", name="conv{}".format(i * 2 - 2))(last_layer) #last_layer = tf.keras.layers.BatchNormalization()(last_layer, training=is_training[0]) last_layer = tf.keras.layers.ReLU()(last_layer) last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), padding="same", data_format="channels_last", name="conv{}".format(i * 2 - 1))(last_layer) #last_layer = tf.keras.layers.BatchNormalization()(last_layer, training=is_training[0]) last_layer = tf.keras.layers.Add()([input_layer, last_layer]) last_layer = tf.keras.layers.ReLU()(last_layer) out_size, kernel, stride = filters[-1] p_layer = tf.keras.layers.Conv2D(filters=out_size, kernel_size=kernel, strides=(stride, stride), padding="valid", data_format="channels_last", name="conv{}".format( 2 * len(filters)))(last_layer) p_layer = tf.keras.layers.ReLU()(p_layer) # last_layer = tf1.layers.AveragePooling2D((2,2),(2,2))(last_layer) #p_layer = tf.keras.layers.Flatten(data_format="channels_last")(p_layer) v_layer = tf.keras.layers.Conv2D( filters=1, kernel_size=kernel, strides=(stride, stride), padding="valid", data_format="channels_last", name="conv{}".format(2 * len(filters) + 1))(last_layer) v_layer = tf.keras.layers.ReLU()(v_layer) # last_layer = tf1.layers.AveragePooling2D((2,2),(2,2))(last_layer) p_layer = tf.keras.layers.Flatten(data_format="channels_last")(p_layer) v_layer = tf.keras.layers.Flatten(data_format="channels_last")(v_layer) self.last_layer_is_flattened = True ''' # Add (optional) post-fc-stack after last Conv2D layer. for i, out_size in enumerate(post_fcnet_hiddens): last_layer = tf.keras.layers.Dense( out_size, name="post_fcnet_{}".format(i), activation=post_fcnet_activation, kernel_initializer=normc_initializer(1.0))(last_layer) ''' self.num_outputs_p = p_layer.shape[1] self.num_outputs_v = v_layer.shape[1] logits_out = p_layer self._value_out = v_layer ''' # Add (optional) post-fc-stack after last Conv2D layer. for i, out_size in enumerate(post_fcnet_hiddens): last_layer = tf.keras.layers.Dense( out_size, name="post_fcnet_{}".format(i), activation=post_fcnet_activation, kernel_initializer=normc_initializer(1.0))(last_layer) ''' ''' # Build the value layers if vf_share_layers: last_layer = tf.keras.layers.Flatten( data_format="channels_last")(last_layer) #last_layer = tf.keras.layers.Lambda( # lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer) value_out = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01))(last_layer) else: # build a parallel set of hidden layers for the value net last_layer = inputs for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), activation=activation, padding="same", data_format="channels_last", name="conv_value_{}".format(i))(last_layer) out_size, kernel, stride = filters[-1] last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), activation=activation, padding="valid", data_format="channels_last", name="conv_value_{}".format(len(filters)))(last_layer) last_layer = tf.keras.layers.Conv2D( 1, [1, 1], activation=None, padding="same", data_format="channels_last", name="conv_value_out")(last_layer) value_out = tf.keras.layers.Lambda( lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer) ''' self.base_model = tf.keras.Model(inputs, [p_layer, self._value_out]) self.base_model.summary()
def __init__( self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str, ): if not model_config.get("conv_filters"): model_config["conv_filters"] = get_filter_config(obs_space.shape) TorchModelV2.__init__( self, obs_space, action_space, num_outputs, model_config, name ) nn.Module.__init__(self) activation = self.model_config.get("conv_activation") filters = self.model_config["conv_filters"] assert len(filters) > 0, "Must provide at least 1 entry in `conv_filters`!" # Post FC net config. post_fcnet_hiddens = model_config.get("post_fcnet_hiddens", []) post_fcnet_activation = get_activation_fn( model_config.get("post_fcnet_activation"), framework="torch" ) no_final_linear = self.model_config.get("no_final_linear") vf_share_layers = self.model_config.get("vf_share_layers") # Whether the last layer is the output of a Flattened (rather than # a n x (1,1) Conv2D). self.last_layer_is_flattened = False self._logits = None layers = [] (w, h, in_channels) = obs_space.shape in_size = [w, h] for out_channels, kernel, stride in filters[:-1]: padding, out_size = same_padding(in_size, kernel, stride) layers.append( SlimConv2d( in_channels, out_channels, kernel, stride, padding, activation_fn=activation, ) ) in_channels = out_channels in_size = out_size out_channels, kernel, stride = filters[-1] # No final linear: Last layer has activation function and exits with # num_outputs nodes (this could be a 1x1 conv or a FC layer, depending # on `post_fcnet_...` settings). if no_final_linear and num_outputs: out_channels = out_channels if post_fcnet_hiddens else num_outputs layers.append( SlimConv2d( in_channels, out_channels, kernel, stride, None, # padding=valid activation_fn=activation, ) ) # Add (optional) post-fc-stack after last Conv2D layer. layer_sizes = post_fcnet_hiddens[:-1] + ( [num_outputs] if post_fcnet_hiddens else [] ) for i, out_size in enumerate(layer_sizes): layers.append( SlimFC( in_size=out_channels, out_size=out_size, activation_fn=post_fcnet_activation, initializer=normc_initializer(1.0), ) ) out_channels = out_size # Finish network normally (w/o overriding last layer size with # `num_outputs`), then add another linear one of size `num_outputs`. else: layers.append( SlimConv2d( in_channels, out_channels, kernel, stride, None, # padding=valid activation_fn=activation, ) ) # num_outputs defined. Use that to create an exact # `num_output`-sized (1,1)-Conv2D. if num_outputs: in_size = [ np.ceil((in_size[0] - kernel[0]) / stride), np.ceil((in_size[1] - kernel[1]) / stride), ] padding, _ = same_padding(in_size, [1, 1], [1, 1]) if post_fcnet_hiddens: layers.append(nn.Flatten()) in_size = out_channels # Add (optional) post-fc-stack after last Conv2D layer. for i, out_size in enumerate(post_fcnet_hiddens + [num_outputs]): layers.append( SlimFC( in_size=in_size, out_size=out_size, activation_fn=post_fcnet_activation if i < len(post_fcnet_hiddens) - 1 else None, initializer=normc_initializer(1.0), ) ) in_size = out_size # Last layer is logits layer. self._logits = layers.pop() else: self._logits = SlimConv2d( out_channels, num_outputs, [1, 1], 1, padding, activation_fn=None, ) # num_outputs not known -> Flatten, then set self.num_outputs # to the resulting number of nodes. else: self.last_layer_is_flattened = True layers.append(nn.Flatten()) self._convs = nn.Sequential(*layers) # If our num_outputs still unknown, we need to do a test pass to # figure out the output dimensions. This could be the case, if we have # the Flatten layer at the end. if self.num_outputs is None: # Create a B=1 dummy sample and push it through out conv-net. dummy_in = ( torch.from_numpy(self.obs_space.sample()) .permute(2, 0, 1) .unsqueeze(0) .float() ) dummy_out = self._convs(dummy_in) self.num_outputs = dummy_out.shape[1] # Build the value layers self._value_branch_separate = self._value_branch = None if vf_share_layers: self._value_branch = SlimFC( out_channels, 1, initializer=normc_initializer(0.01), activation_fn=None ) else: vf_layers = [] (w, h, in_channels) = obs_space.shape in_size = [w, h] for out_channels, kernel, stride in filters[:-1]: padding, out_size = same_padding(in_size, kernel, stride) vf_layers.append( SlimConv2d( in_channels, out_channels, kernel, stride, padding, activation_fn=activation, ) ) in_channels = out_channels in_size = out_size out_channels, kernel, stride = filters[-1] vf_layers.append( SlimConv2d( in_channels, out_channels, kernel, stride, None, activation_fn=activation, ) ) vf_layers.append( SlimConv2d( in_channels=out_channels, out_channels=1, kernel=1, stride=1, padding=None, activation_fn=None, ) ) self._value_branch_separate = nn.Sequential(*vf_layers) # Holds the current "base" output (before logits layer). self._features = None
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): if not model_config.get("conv_filters"): model_config["conv_filters"] = get_filter_config(obs_space.shape) TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) activation = self.model_config.get("conv_activation") filters = self.model_config["conv_filters"] assert len(filters) > 0,\ "Must provide at least 1 entry in `conv_filters`!" # Post FC net config. post_fcnet_hiddens = model_config.get("post_fcnet_hiddens", []) post_fcnet_activation = get_activation_fn( model_config.get("post_fcnet_activation"), framework="torch") no_final_linear = self.model_config.get("no_final_linear") vf_share_layers = self.model_config.get("vf_share_layers") # Whether the last layer is the output of a Flattened (rather than # a n x (1,1) Conv2D). self.last_layer_is_flattened = False self._logits = None self.traj_view_framestacking = False layers = [] # Perform Atari framestacking via traj. view API. if model_config.get("num_framestacks") != "auto" and \ model_config.get("num_framestacks", 0) > 1: (w, h) = obs_space.shape in_channels = model_config["num_framestacks"] self.traj_view_framestacking = True else: (w, h, in_channels) = obs_space.shape in_size = [w, h] for out_channels, kernel, stride in filters[:-1]: padding, out_size = same_padding(in_size, kernel, [stride, stride]) layers.append( SlimConv2d(in_channels, out_channels, kernel, stride, padding, activation_fn=activation)) in_channels = out_channels in_size = out_size out_channels, kernel, stride = filters[-1] # No final linear: Last layer has activation function and exits with # num_outputs nodes (this could be a 1x1 conv or a FC layer, depending # on `post_fcnet_...` settings). if no_final_linear and num_outputs: out_channels = out_channels if post_fcnet_hiddens else num_outputs layers.append( SlimConv2d( in_channels, out_channels, kernel, stride, None, # padding=valid activation_fn=activation)) # Add (optional) post-fc-stack after last Conv2D layer. layer_sizes = post_fcnet_hiddens[:-1] + ( [num_outputs] if post_fcnet_hiddens else []) for i, out_size in enumerate(layer_sizes): layers.append( SlimFC(in_size=out_channels, out_size=out_size, activation_fn=post_fcnet_activation, initializer=normc_initializer(1.0))) out_channels = out_size # Finish network normally (w/o overriding last layer size with # `num_outputs`), then add another linear one of size `num_outputs`. else: layers.append( SlimConv2d( in_channels, out_channels, kernel, stride, None, # padding=valid activation_fn=activation)) # num_outputs defined. Use that to create an exact # `num_output`-sized (1,1)-Conv2D. if num_outputs: in_size = [ np.ceil((in_size[0] - kernel[0]) / stride), np.ceil((in_size[1] - kernel[1]) / stride) ] padding, _ = same_padding(in_size, [1, 1], [1, 1]) if post_fcnet_hiddens: layers.append(nn.Flatten()) in_size = out_channels # Add (optional) post-fc-stack after last Conv2D layer. for i, out_size in enumerate(post_fcnet_hiddens + [num_outputs]): layers.append( SlimFC(in_size=in_size, out_size=out_size, activation_fn=post_fcnet_activation if i < len(post_fcnet_hiddens) - 1 else None, initializer=normc_initializer(1.0))) in_size = out_size # Last layer is logits layer. self._logits = layers.pop() else: self._logits = SlimConv2d(out_channels, num_outputs, [1, 1], 1, padding, activation_fn=None) # num_outputs not known -> Flatten, then set self.num_outputs # to the resulting number of nodes. else: self.last_layer_is_flattened = True layers.append(nn.Flatten()) self.num_outputs = out_channels self._convs = nn.Sequential(*layers) # Build the value layers self._value_branch_separate = self._value_branch = None if vf_share_layers: self._value_branch = SlimFC(out_channels, 1, initializer=normc_initializer(0.01), activation_fn=None) else: vf_layers = [] if self.traj_view_framestacking: (w, h) = obs_space.shape in_channels = model_config["num_framestacks"] else: (w, h, in_channels) = obs_space.shape in_size = [w, h] for out_channels, kernel, stride in filters[:-1]: padding, out_size = same_padding(in_size, kernel, [stride, stride]) vf_layers.append( SlimConv2d(in_channels, out_channels, kernel, stride, padding, activation_fn=activation)) in_channels = out_channels in_size = out_size out_channels, kernel, stride = filters[-1] vf_layers.append( SlimConv2d(in_channels, out_channels, kernel, stride, None, activation_fn=activation)) vf_layers.append( SlimConv2d(in_channels=out_channels, out_channels=1, kernel=1, stride=1, padding=None, activation_fn=None)) self._value_branch_separate = nn.Sequential(*vf_layers) # Holds the current "base" output (before logits layer). self._features = None # Optional: framestacking obs/new_obs for Atari. if self.traj_view_framestacking: from_ = model_config["num_framestacks"] - 1 self.view_requirements[SampleBatch.OBS].shift = \ "-{}:0".format(from_) self.view_requirements[SampleBatch.OBS].shift_from = -from_ self.view_requirements[SampleBatch.OBS].shift_to = 0 self.view_requirements[SampleBatch.NEXT_OBS] = ViewRequirement( data_col=SampleBatch.OBS, shift="-{}:1".format(from_ - 1), space=self.view_requirements[SampleBatch.OBS].space, )
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: Optional[int], model_config: ModelConfigDict, name: str, actor_hidden_activation: str = "relu", actor_hiddens: Tuple[int] = (256, 256), critic_hidden_activation: str = "relu", critic_hiddens: Tuple[int] = (256, 256), twin_q: bool = False, initial_alpha: float = 1.0, target_entropy: Optional[float] = None): """Initializes a SACTorchModel instance. 7 Args: actor_hidden_activation (str): Activation for the actor network. actor_hiddens (list): Hidden layers sizes for the actor network. critic_hidden_activation (str): Activation for the critic network. critic_hiddens (list): Hidden layers sizes for the critic network. twin_q (bool): Build twin Q networks (Q-net and target) for more stable Q-learning. initial_alpha (float): The initial value for the to-be-optimized alpha parameter (default: 1.0). target_entropy (Optional[float]): A target entropy value for the to-be-optimized alpha parameter. If None, will use the defaults described in the papers for SAC (and discrete SAC). Note that the core layers for forward() are not defined here, this only defines the layers for the output heads. Those layers for forward() should be defined in subclasses of SACModel. """ nn.Module.__init__(self) super(SACTorchModel, self).__init__(obs_space, action_space, num_outputs, model_config, name) if isinstance(action_space, Discrete): self.action_dim = action_space.n self.discrete = True action_outs = q_outs = self.action_dim action_ins = None # No action inputs for the discrete case. elif isinstance(action_space, Box): self.action_dim = np.product(action_space.shape) self.discrete = False action_outs = 2 * self.action_dim action_ins = self.action_dim q_outs = 1 else: assert isinstance(action_space, Simplex) self.action_dim = np.product(action_space.shape) self.discrete = False action_outs = self.action_dim action_ins = self.action_dim q_outs = 1 # Build the policy network. self.action_model = nn.Sequential() ins = self.num_outputs self.obs_ins = ins activation = get_activation_fn(actor_hidden_activation, framework="torch") for i, n in enumerate(actor_hiddens): self.action_model.add_module( "action_{}".format(i), SlimFC(ins, n, initializer=torch.nn.init.xavier_uniform_, activation_fn=activation)) ins = n self.action_model.add_module( "action_out", SlimFC(ins, action_outs, initializer=torch.nn.init.xavier_uniform_, activation_fn=None)) # Build the Q-net(s), including target Q-net(s). def build_q_net(name_): activation = get_activation_fn(critic_hidden_activation, framework="torch") # For continuous actions: Feed obs and actions (concatenated) # through the NN. For discrete actions, only obs. q_net = nn.Sequential() ins = self.obs_ins + (0 if self.discrete else action_ins) for i, n in enumerate(critic_hiddens): q_net.add_module( "{}_hidden_{}".format(name_, i), SlimFC(ins, n, initializer=torch.nn.init.xavier_uniform_, activation_fn=activation)) ins = n q_net.add_module( "{}_out".format(name_), SlimFC(ins, q_outs, initializer=torch.nn.init.xavier_uniform_, activation_fn=None)) return q_net self.q_net = build_q_net("q") if twin_q: self.twin_q_net = build_q_net("twin_q") else: self.twin_q_net = None log_alpha = nn.Parameter( torch.from_numpy(np.array([np.log(initial_alpha)])).float()) self.register_parameter("log_alpha", log_alpha) # Auto-calculate the target entropy. if target_entropy is None or target_entropy == "auto": # See hyperparams in [2] (README.md). if self.discrete: target_entropy = 0.98 * np.array(-np.log(1.0 / action_space.n), dtype=np.float32) # See [1] (README.md). else: target_entropy = -np.prod(action_space.shape) self.target_entropy = torch.tensor(data=[target_entropy], dtype=torch.float32, requires_grad=False)
def __init__(self, obs_space, action_space, num_outputs, model_config, name, last_layer_activation): super(FullyConnectedNetworkLastLayerActivation, self).__init__(obs_space, action_space, num_outputs, model_config, name) activation = get_activation_fn(model_config.get("fcnet_activation")) if last_layer_activation is not None: last_layer_activation = get_activation_fn(last_layer_activation) hiddens = model_config.get("fcnet_hiddens") no_final_linear = model_config.get("no_final_linear") vf_share_layers = model_config.get("vf_share_layers") # we are using obs_flat, so take the flattened shape as input inputs = tf.keras.layers.Input(shape=(np.product(obs_space.shape), ), name="observations") last_layer = inputs i = 1 if no_final_linear: # the last layer is adjusted to be of size num_outputs for size in hiddens[:-1]: last_layer = tf.keras.layers.Dense( size, name="fc_{}".format(i), activation=activation, kernel_initializer=normc_initializer(1.0))(last_layer) i += 1 layer_out = tf.keras.layers.Dense( num_outputs, name="fc_out", activation=activation, kernel_initializer=normc_initializer(1.0))(last_layer) else: # the last layer is a linear if last_layer_activation is None, else last_layer_activation for size in hiddens: last_layer = tf.keras.layers.Dense( size, name="fc_{}".format(i), activation=activation, kernel_initializer=normc_initializer(1.0))(last_layer) i += 1 layer_out = tf.keras.layers.Dense( num_outputs, name="fc_out", activation=last_layer_activation, kernel_initializer=normc_initializer(0.01))(last_layer) if not vf_share_layers: # build a parallel set of hidden layers for the value net last_layer = inputs i = 1 for size in hiddens: last_layer = tf.keras.layers.Dense( size, name="fc_value_{}".format(i), activation=activation, kernel_initializer=normc_initializer(1.0))(last_layer) i += 1 value_out = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01))(last_layer) self.base_model = tf.keras.Model(inputs, [layer_out, value_out])