def __init__(self, obs_space, action_space, num_outputs, model_config, name): # TODO: (sven) Support Dicts as well. assert isinstance(obs_space.original_space, (Tuple)), \ "`obs_space.original_space` must be Tuple!" super().__init__(obs_space, action_space, num_outputs, model_config, name) # Build the CNN(s) given obs_space's image components. self.cnns = {} concat_size = 0 for i, component in enumerate(obs_space.original_space): # Image space. if len(component.shape) == 3: config = { "conv_filters": model_config.get("conv_filters", get_filter_config(component.shape)), "conv_activation": model_config.get("conv_activation"), } cnn = ModelCatalog.get_model_v2(component, action_space, num_outputs=None, model_config=config, framework="tf", name="cnn_{}".format(i)) concat_size += cnn.num_outputs self.cnns[i] = cnn # Discrete inputs -> One-hot encode. elif isinstance(component, Discrete): concat_size += component.n # TODO: (sven) Multidiscrete (see e.g. our auto-LSTM wrappers). # Everything else (1D Box). else: assert len(component.shape) == 1, \ "Only input Box 1D or 3D spaces allowed!" concat_size += component.shape[-1] self.logits_and_value_model = None self._value_out = None if num_outputs: # Action-distribution head. concat_layer = tf.keras.layers.Input((concat_size, )) logits_layer = tf.keras.layers.Dense( num_outputs, activation=tf.keras.activations.linear, name="logits")(concat_layer) # Create the value branch model. value_layer = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01))(concat_layer) self.logits_and_value_model = tf.keras.models.Model( concat_layer, [logits_layer, value_layer]) else: self.num_outputs = concat_size
def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kwargs): if not model_config.get("conv_filters"): model_config["conv_filters"] = get_filter_config(obs_space.shape) TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) layers = [] (w, h, in_channels) = obs_space.shape self._logits = None filters = self.model_config["conv_filters"] assert len(filters) > 0, \ "Must provide at least 1 entry in `conv_filters`!" self.data_format = "channels_last" for i, (out_channels, kernel, stride) in enumerate(filters[:-1], 1): if i == 1: layers.append( nn.Sequential( SlimConv2d(in_channels, out_channels, kernel, stride, padding=1, activation_fn=None), nn.BatchNorm2d(out_channels), nn.ReLU())) in_channels = out_channels else: layers.append(ResidualBlock(in_channels, out_channels, kernel)) in_channels = out_channels out_channels, kernel, stride = filters[-1] p_layer = nn.Sequential( SlimConv2d(in_channels, out_channels, kernel, stride, 0), nn.BatchNorm2d(out_channels), nn.ReLU()) v_layer = nn.Sequential(SlimConv2d(in_channels, 1, kernel, stride, 0), nn.BatchNorm2d(1), nn.ReLU()) self._logits = p_layer self._value_branch = v_layer self._flat = nn.Flatten() self.num_outputs_p = w * h * out_channels self.num_outputs_v = w * h self._convs = nn.Sequential(*layers)
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): if not model_config.get("conv_filters"): model_config["conv_filters"] = get_filter_config(obs_space.shape) super(CustomVisionNetwork, self).__init__(obs_space, action_space, num_outputs, model_config, name) activation = get_activation_fn( self.model_config.get("conv_activation"), framework="tf") filters = self.model_config["conv_filters"] assert len(filters) > 0,\ "Must provide at least 1 entry in `conv_filters`!" input_shape = obs_space.shape self.data_format = "channels_last" inputs = tf.keras.layers.Input(shape=input_shape, name="observations") #is_training = tf.keras.layers.Input( # shape=(), dtype=tf.bool, batch_size=1, name="is_training") last_layer = inputs # Whether the last layer is the output of a Flattened (rather than # a n x (1,1) Conv2D). self.last_layer_is_flattened = False # Build the action layers for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), padding="same", activation=activation, data_format="channels_last", name="conv{}".format(i))(last_layer) out_size, kernel, stride = filters[-1] p_layer = tf.keras.layers.Conv2D( filters=out_size, kernel_size=kernel, strides=(stride, stride), padding="valid", data_format="channels_last", name="conv{}".format(len(filters)))(last_layer) p_layer = tf.keras.layers.ReLU()(p_layer) v_layer = tf.keras.layers.Conv2D( filters=out_size, kernel_size=kernel, strides=(stride, stride), padding="valid", data_format="channels_last", name="conv{}".format(len(filters) + 1))(last_layer) v_layer = tf.keras.layers.ReLU()(v_layer) # last_layer = tf1.layers.AveragePooling2D((2,2),(2,2))(last_layer) p_layer = tf.keras.layers.Flatten(data_format="channels_last")(p_layer) v_layer = tf.keras.layers.Flatten(data_format="channels_last")(v_layer) self.last_layer_is_flattened = True self.num_outputs_p = p_layer.shape[1] self.num_outputs_v = v_layer.shape[1] self._value_out = v_layer self.base_model = tf.keras.Model(inputs, [p_layer, self._value_out]) self.base_model.summary()
def __init__(self, obs_space, action_space, num_outputs, model_config, name): if not model_config.get("conv_filters"): model_config["conv_filters"] = get_filter_config(obs_space.shape) TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) activation = self.model_config.get("conv_activation") filters = self.model_config["conv_filters"] no_final_linear = self.model_config.get("no_final_linear") vf_share_layers = self.model_config.get("vf_share_layers") # Whether the last layer is the output of a Flattened (rather than # a n x (1,1) Conv2D). self.last_layer_is_flattened = False self._logits = None layers = [] (w, h, in_channels) = obs_space.shape in_size = [w, h] for out_channels, kernel, stride in filters[:-1]: padding, out_size = same_padding(in_size, kernel, [stride, stride]) layers.append( SlimConv2d(in_channels, out_channels, kernel, stride, padding, activation_fn=activation)) in_channels = out_channels in_size = out_size out_channels, kernel, stride = filters[-1] # No final linear: Last layer is a Conv2D and uses num_outputs. if no_final_linear and num_outputs: layers.append( SlimConv2d( in_channels, num_outputs, kernel, stride, None, # padding=valid activation_fn=activation)) out_channels = num_outputs # Finish network normally (w/o overriding last layer size with # `num_outputs`), then add another linear one of size `num_outputs`. else: layers.append( SlimConv2d( in_channels, out_channels, kernel, stride, None, # padding=valid activation_fn=activation)) # num_outputs defined. Use that to create an exact # `num_output`-sized (1,1)-Conv2D. if num_outputs: in_size = [ np.ceil((in_size[0] - kernel[0]) / stride), np.ceil((in_size[1] - kernel[1]) / stride) ] padding, _ = same_padding(in_size, [1, 1], [1, 1]) self._logits = SlimConv2d(out_channels, num_outputs, [1, 1], 1, padding, activation_fn=None) # num_outputs not known -> Flatten, then set self.num_outputs # to the resulting number of nodes. else: self.last_layer_is_flattened = True layers.append(nn.Flatten()) self.num_outputs = out_channels self._convs = nn.Sequential(*layers) # Build the value layers self._value_branch_separate = self._value_branch = None if vf_share_layers: self._value_branch = SlimFC(out_channels, 1, initializer=normc_initializer(0.01), activation_fn=None) else: vf_layers = [] (w, h, in_channels) = obs_space.shape in_size = [w, h] for out_channels, kernel, stride in filters[:-1]: padding, out_size = same_padding(in_size, kernel, [stride, stride]) vf_layers.append( SlimConv2d(in_channels, out_channels, kernel, stride, padding, activation_fn=activation)) in_channels = out_channels in_size = out_size out_channels, kernel, stride = filters[-1] vf_layers.append( SlimConv2d(in_channels, out_channels, kernel, stride, None, activation_fn=activation)) vf_layers.append( SlimConv2d(in_channels=out_channels, out_channels=1, kernel=1, stride=1, padding=None, activation_fn=None)) self._value_branch_separate = nn.Sequential(*vf_layers) # Holds the current "base" output (before logits layer). self._features = None
def __init__(self, obs_space, action_space, num_outputs, model_config, name): self.original_space = obs_space.original_space if \ hasattr(obs_space, "original_space") else obs_space assert isinstance(self.original_space, (Dict, Tuple)), \ "`obs_space.original_space` must be [Dict|Tuple]!" self.processed_obs_space = self.original_space if \ model_config.get("_disable_preprocessor_api") else obs_space nn.Module.__init__(self) TorchModelV2.__init__(self, self.original_space, action_space, num_outputs, model_config, name) self.flattened_input_space = flatten_space(self.original_space) # Atari type CNNs or IMPALA type CNNs (with residual layers)? # self.cnn_type = self.model_config["custom_model_config"].get( # "conv_type", "atari") # Build the CNN(s) given obs_space's image components. self.cnns = {} self.one_hot = {} self.flatten = {} concat_size = 0 for i, component in enumerate(self.flattened_input_space): # Image space. if len(component.shape) == 3: config = { "conv_filters": model_config["conv_filters"] if "conv_filters" in model_config else get_filter_config(obs_space.shape), "conv_activation": model_config.get("conv_activation"), "post_fcnet_hiddens": [], } # if self.cnn_type == "atari": cnn = ModelCatalog.get_model_v2(component, action_space, num_outputs=None, model_config=config, framework="torch", name="cnn_{}".format(i)) # TODO (sven): add IMPALA-style option. # else: # cnn = TorchImpalaVisionNet( # component, # action_space, # num_outputs=None, # model_config=config, # name="cnn_{}".format(i)) concat_size += cnn.num_outputs self.cnns[i] = cnn self.add_module("cnn_{}".format(i), cnn) # Discrete|MultiDiscrete inputs -> One-hot encode. elif isinstance(component, Discrete): self.one_hot[i] = True concat_size += component.n elif isinstance(component, MultiDiscrete): self.one_hot[i] = True concat_size += sum(component.nvec) # Everything else (1D Box). else: self.flatten[i] = int(np.product(component.shape)) concat_size += self.flatten[i] # Optional post-concat FC-stack. post_fc_stack_config = { "fcnet_hiddens": model_config.get("post_fcnet_hiddens", []), "fcnet_activation": model_config.get("post_fcnet_activation", "relu") } self.post_fc_stack = ModelCatalog.get_model_v2(Box( float("-inf"), float("inf"), shape=(concat_size, ), dtype=np.float32), self.action_space, None, post_fc_stack_config, framework="torch", name="post_fc_stack") # Actions and value heads. self.logits_layer = None self.value_layer = None self._value_out = None if num_outputs: # Action-distribution head. self.logits_layer = SlimFC( in_size=self.post_fc_stack.num_outputs, out_size=num_outputs, activation_fn=None, ) # Create the value branch model. self.value_layer = SlimFC( in_size=self.post_fc_stack.num_outputs, out_size=1, activation_fn=None, initializer=torch_normc_initializer(0.01)) else: self.num_outputs = concat_size
def __init__( self, input_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: Optional[int] = None, *, name: str = "", conv_filters: Optional[Sequence[Sequence[int]]] = None, conv_activation: Optional[str] = None, post_fcnet_hiddens: Optional[Sequence[int]] = (), post_fcnet_activation: Optional[str] = None, no_final_linear: bool = False, vf_share_layers: bool = False, free_log_std: bool = False, **kwargs, ): super().__init__(name=name) if not conv_filters: conv_filters = get_filter_config(input_space.shape) assert len(conv_filters) > 0,\ "Must provide at least 1 entry in `conv_filters`!" conv_activation = get_activation_fn(conv_activation, framework="tf") post_fcnet_activation = get_activation_fn(post_fcnet_activation, framework="tf") input_shape = input_space.shape self.data_format = "channels_last" inputs = tf.keras.layers.Input(shape=input_shape, name="observations") last_layer = inputs # Whether the last layer is the output of a Flattened (rather than # a n x (1,1) Conv2D). self.last_layer_is_flattened = False # Build the action layers for i, (out_size, kernel, stride) in enumerate(conv_filters[:-1], 1): last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=stride if isinstance(stride, (list, tuple)) else (stride, stride), activation=conv_activation, padding="same", data_format="channels_last", name="conv{}".format(i))(last_layer) out_size, kernel, stride = conv_filters[-1] # No final linear: Last layer has activation function and exits with # num_outputs nodes (this could be a 1x1 conv or a FC layer, depending # on `post_fcnet_...` settings). if no_final_linear and num_outputs: last_layer = tf.keras.layers.Conv2D( out_size if post_fcnet_hiddens else num_outputs, kernel, strides=stride if isinstance(stride, (list, tuple)) else (stride, stride), activation=conv_activation, padding="valid", data_format="channels_last", name="conv_out")(last_layer) # Add (optional) post-fc-stack after last Conv2D layer. layer_sizes = post_fcnet_hiddens[:-1] + ( [num_outputs] if post_fcnet_hiddens else []) for i, out_size in enumerate(layer_sizes): last_layer = tf.keras.layers.Dense( out_size, name="post_fcnet_{}".format(i), activation=post_fcnet_activation, kernel_initializer=normc_initializer(1.0))(last_layer) # Finish network normally (w/o overriding last layer size with # `num_outputs`), then add another linear one of size `num_outputs`. else: last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=stride if isinstance(stride, (list, tuple)) else (stride, stride), activation=conv_activation, padding="valid", data_format="channels_last", name="conv{}".format(len(conv_filters)))(last_layer) # num_outputs defined. Use that to create an exact # `num_output`-sized (1,1)-Conv2D. if num_outputs: if post_fcnet_hiddens: last_cnn = last_layer = tf.keras.layers.Conv2D( post_fcnet_hiddens[0], [1, 1], activation=post_fcnet_activation, padding="same", data_format="channels_last", name="conv_out")(last_layer) # Add (optional) post-fc-stack after last Conv2D layer. for i, out_size in enumerate(post_fcnet_hiddens[1:] + [num_outputs]): last_layer = tf.keras.layers.Dense( out_size, name="post_fcnet_{}".format(i + 1), activation=post_fcnet_activation if i < len(post_fcnet_hiddens) - 1 else None, kernel_initializer=normc_initializer(1.0))( last_layer) else: last_cnn = last_layer = tf.keras.layers.Conv2D( num_outputs, [1, 1], activation=None, padding="same", data_format="channels_last", name="conv_out")(last_layer) if last_cnn.shape[1] != 1 or last_cnn.shape[2] != 1: raise ValueError( "Given `conv_filters` ({}) do not result in a [B, 1, " "1, {} (`num_outputs`)] shape (but in {})! Please " "adjust your Conv2D stack such that the dims 1 and 2 " "are both 1.".format(self.model_config["conv_filters"], num_outputs, list(last_cnn.shape))) # num_outputs not known -> Flatten. else: self.last_layer_is_flattened = True last_layer = tf.keras.layers.Flatten( data_format="channels_last")(last_layer) # Add (optional) post-fc-stack after last Conv2D layer. for i, out_size in enumerate(post_fcnet_hiddens): last_layer = tf.keras.layers.Dense( out_size, name="post_fcnet_{}".format(i), activation=post_fcnet_activation, kernel_initializer=normc_initializer(1.0))(last_layer) logits_out = last_layer # Build the value layers if vf_share_layers: if not self.last_layer_is_flattened: last_layer = tf.keras.layers.Lambda( lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer) value_out = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01))(last_layer) else: # build a parallel set of hidden layers for the value net last_layer = inputs for i, (out_size, kernel, stride) in enumerate(conv_filters[:-1], 1): last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=stride if isinstance(stride, (list, tuple)) else (stride, stride), activation=conv_activation, padding="same", data_format="channels_last", name="conv_value_{}".format(i))(last_layer) out_size, kernel, stride = conv_filters[-1] last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=stride if isinstance(stride, (list, tuple)) else (stride, stride), activation=conv_activation, padding="valid", data_format="channels_last", name="conv_value_{}".format(len(conv_filters)))(last_layer) last_layer = tf.keras.layers.Conv2D( 1, [1, 1], activation=None, padding="same", data_format="channels_last", name="conv_value_out")(last_layer) value_out = tf.keras.layers.Lambda( lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer) self.base_model = tf.keras.Model(inputs, [logits_out, value_out])
def __init__(self, obs_space, action_space, num_outputs, model_config, name): # TODO: (sven) Support Dicts as well. self.original_space = obs_space.original_space if \ hasattr(obs_space, "original_space") else obs_space assert isinstance(self.original_space, (Tuple)), \ "`obs_space.original_space` must be Tuple!" super().__init__(self.original_space, action_space, num_outputs, model_config, name) # Build the CNN(s) given obs_space's image components. self.cnns = {} self.one_hot = {} self.flatten = {} concat_size = 0 for i, component in enumerate(self.original_space): # Image space. if len(component.shape) == 3: config = { "conv_filters": model_config["conv_filters"] if "conv_filters" in model_config else get_filter_config(obs_space.shape), "conv_activation": model_config.get("conv_activation"), "post_fcnet_hiddens": [], } cnn = ModelCatalog.get_model_v2( component, action_space, num_outputs=None, model_config=config, framework="tf", name="cnn_{}".format(i)) concat_size += cnn.num_outputs self.cnns[i] = cnn # Discrete inputs -> One-hot encode. elif isinstance(component, Discrete): self.one_hot[i] = True concat_size += component.n # TODO: (sven) Multidiscrete (see e.g. our auto-LSTM wrappers). # Everything else (1D Box). else: self.flatten[i] = int(np.product(component.shape)) concat_size += self.flatten[i] # Optional post-concat FC-stack. post_fc_stack_config = { "fcnet_hiddens": model_config.get("post_fcnet_hiddens", []), "fcnet_activation": model_config.get("post_fcnet_activation", "relu") } self.post_fc_stack = ModelCatalog.get_model_v2( Box(float("-inf"), float("inf"), shape=(concat_size, ), dtype=np.float32), self.action_space, None, post_fc_stack_config, framework="tf", name="post_fc_stack") # Actions and value heads. self.logits_and_value_model = None self._value_out = None if num_outputs: # Action-distribution head. concat_layer = tf.keras.layers.Input( (self.post_fc_stack.num_outputs, )) logits_layer = tf.keras.layers.Dense( num_outputs, activation=tf.keras.activations.linear, name="logits")(concat_layer) # Create the value branch model. value_layer = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01))(concat_layer) self.logits_and_value_model = tf.keras.models.Model( concat_layer, [logits_layer, value_layer]) else: self.num_outputs = self.post_fc_stack.num_outputs
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): if not model_config.get("conv_filters"): model_config["conv_filters"] = get_filter_config(obs_space.shape) super(VisionNetwork, self).__init__(obs_space, action_space, num_outputs, model_config, name) activation = get_activation_fn( self.model_config.get("conv_activation"), framework="tf") filters = self.model_config["conv_filters"] assert len(filters) > 0,\ "Must provide at least 1 entry in `conv_filters`!" no_final_linear = self.model_config.get("no_final_linear") vf_share_layers = self.model_config.get("vf_share_layers") inputs = tf.keras.layers.Input(shape=obs_space.shape, name="observations") last_layer = inputs # Whether the last layer is the output of a Flattened (rather than # a n x (1,1) Conv2D). self.last_layer_is_flattened = False # Build the action layers for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), activation=activation, padding="same", data_format="channels_last", name="conv{}".format(i))(last_layer) out_size, kernel, stride = filters[-1] # No final linear: Last layer is a Conv2D and uses num_outputs. if no_final_linear and num_outputs: last_layer = tf.keras.layers.Conv2D(num_outputs, kernel, strides=(stride, stride), activation=activation, padding="valid", data_format="channels_last", name="conv_out")(last_layer) conv_out = last_layer # Finish network normally (w/o overriding last layer size with # `num_outputs`), then add another linear one of size `num_outputs`. else: last_layer = tf.keras.layers.Conv2D(out_size, kernel, strides=(stride, stride), activation=activation, padding="valid", data_format="channels_last", name="conv{}".format( len(filters)))(last_layer) # num_outputs defined. Use that to create an exact # `num_output`-sized (1,1)-Conv2D. if num_outputs: conv_out = tf.keras.layers.Conv2D(num_outputs, [1, 1], activation=None, padding="same", data_format="channels_last", name="conv_out")(last_layer) if conv_out.shape[1] != 1 or conv_out.shape[2] != 1: raise ValueError( "Given `conv_filters` ({}) do not result in a [B, 1, " "1, {} (`num_outputs`)] shape (but in {})! Please " "adjust your Conv2D stack such that the dims 1 and 2 " "are both 1.".format(self.model_config["conv_filters"], self.num_outputs, list(conv_out.shape))) # num_outputs not known -> Flatten, then set self.num_outputs # to the resulting number of nodes. else: self.last_layer_is_flattened = True conv_out = tf.keras.layers.Flatten( data_format="channels_last")(last_layer) self.num_outputs = conv_out.shape[1] # Build the value layers if vf_share_layers: last_layer = tf.keras.layers.Lambda( lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer) value_out = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01))(last_layer) else: # build a parallel set of hidden layers for the value net last_layer = inputs for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), activation=activation, padding="same", data_format="channels_last", name="conv_value_{}".format(i))(last_layer) out_size, kernel, stride = filters[-1] last_layer = tf.keras.layers.Conv2D(out_size, kernel, strides=(stride, stride), activation=activation, padding="valid", data_format="channels_last", name="conv_value_{}".format( len(filters)))(last_layer) last_layer = tf.keras.layers.Conv2D( 1, [1, 1], activation=None, padding="same", data_format="channels_last", name="conv_value_out")(last_layer) value_out = tf.keras.layers.Lambda( lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer) self.base_model = tf.keras.Model(inputs, [conv_out, value_out]) self.register_variables(self.base_model.variables)
def __init__(self, obs_space, action_space, num_outputs, model_config, name): self.original_space = obs_space.original_space if \ hasattr(obs_space, "original_space") else obs_space assert isinstance(self.original_space, (Tuple)), \ "`obs_space.original_space` must be Tuple!" nn.Module.__init__(self) TorchModelV2.__init__(self, self.original_space, action_space, num_outputs, model_config, name) self.new_obs_space = obs_space # Atari type CNNs or IMPALA type CNNs (with residual layers)? # self.cnn_type = self.model_config["custom_model_config"].get( # "conv_type", "atari") # Build the CNN(s) given obs_space's image components. self.cnns = {} self.one_hot = {} self.flatten = {} concat_size_p, concat_size_v = 0, 0 for i, component in enumerate(self.original_space[:-1]): # Image space. if len(component.shape) == 3: config = { "conv_filters": model_config["conv_filters"] if "conv_filters" in model_config else get_filter_config(obs_space.shape), "conv_activation": model_config.get("conv_activation"), "post_fcnet_hiddens": [], } # if self.cnn_type == "atari": cnn = TorchBatchNormModel(component, action_space, None, config, 'cnn_{}'.format(i)) print(cnn) concat_size_p += cnn.num_outputs_p concat_size_v += cnn.num_outputs_v self.cnns[i] = cnn self.add_module("cnn_{}".format(i), cnn) # Discrete inputs -> One-hot encode. elif isinstance(component, Discrete): self.one_hot[i] = True concat_size_p += component.n concat_size_v += component.n # Everything else (1D Box). else: self.flatten[i] = int(np.product(component.shape)) concat_size_p += self.flatten[i] concat_size_v += self.flatten[i] hidden_size = model_config.get("post_fcnet_hiddens", []) self.post_fc_stack = nn.Sequential( SlimFC(concat_size_p, hidden_size[0], initializer=torch_normc_initializer(1.0), activation_fn=None), nn.BatchNorm1d(hidden_size[0]), nn.ReLU()) self.post_fc_stack_vf = nn.Sequential( SlimFC(concat_size_v, hidden_size[0], initializer=torch_normc_initializer(1.0), activation_fn=None), nn.BatchNorm1d(hidden_size[0]), nn.ReLU()) # Actions and value heads. self.logits_layer = None self.value_layer = None self._value_out = None if num_outputs: # Action-distribution head. self.logits_layer = SlimFC( in_size=hidden_size[0], out_size=num_outputs, initializer=torch_normc_initializer(0.01), activation_fn=None, ) # Create the value branch model. self.value_layer = SlimFC( in_size=hidden_size[0], out_size=1, initializer=torch_normc_initializer(1.0), activation_fn='tanh', ) else: raise NotImplementedError()
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): if not model_config.get("conv_filters"): model_config["conv_filters"] = get_filter_config(obs_space.shape) super(VisionNetwork, self).__init__(obs_space, action_space, num_outputs, model_config, name) activation = get_activation_fn( self.model_config.get("conv_activation"), framework="tf") filters = self.model_config["conv_filters"] assert len(filters) > 0,\ "Must provide at least 1 entry in `conv_filters`!" # Post FC net config. post_fcnet_hiddens = model_config.get("post_fcnet_hiddens", []) post_fcnet_activation = get_activation_fn( model_config.get("post_fcnet_activation"), framework="tf") no_final_linear = self.model_config.get("no_final_linear") vf_share_layers = self.model_config.get("vf_share_layers") self.traj_view_framestacking = False # Perform Atari framestacking via traj. view API. if model_config.get("num_framestacks") != "auto" and \ model_config.get("num_framestacks", 0) > 1: input_shape = obs_space.shape + (model_config["num_framestacks"], ) self.data_format = "channels_first" self.traj_view_framestacking = True else: input_shape = obs_space.shape self.data_format = "channels_last" inputs = tf.keras.layers.Input(shape=input_shape, name="observations") last_layer = inputs # Whether the last layer is the output of a Flattened (rather than # a n x (1,1) Conv2D). self.last_layer_is_flattened = False # Build the action layers for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=stride if isinstance(stride, (list, tuple)) else (stride, stride), activation=activation, padding="same", data_format="channels_last", name="conv{}".format(i))(last_layer) out_size, kernel, stride = filters[-1] # No final linear: Last layer has activation function and exits with # num_outputs nodes (this could be a 1x1 conv or a FC layer, depending # on `post_fcnet_...` settings). if no_final_linear and num_outputs: last_layer = tf.keras.layers.Conv2D( out_size if post_fcnet_hiddens else num_outputs, kernel, strides=stride if isinstance(stride, (list, tuple)) else (stride, stride), activation=activation, padding="valid", data_format="channels_last", name="conv_out")(last_layer) # Add (optional) post-fc-stack after last Conv2D layer. layer_sizes = post_fcnet_hiddens[:-1] + ( [num_outputs] if post_fcnet_hiddens else []) for i, out_size in enumerate(layer_sizes): last_layer = tf.keras.layers.Dense( out_size, name="post_fcnet_{}".format(i), activation=post_fcnet_activation, kernel_initializer=normc_initializer(1.0))(last_layer) # Finish network normally (w/o overriding last layer size with # `num_outputs`), then add another linear one of size `num_outputs`. else: last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=stride if isinstance(stride, (list, tuple)) else (stride, stride), activation=activation, padding="valid", data_format="channels_last", name="conv{}".format(len(filters)))(last_layer) # num_outputs defined. Use that to create an exact # `num_output`-sized (1,1)-Conv2D. if num_outputs: if post_fcnet_hiddens: last_cnn = last_layer = tf.keras.layers.Conv2D( post_fcnet_hiddens[0], [1, 1], activation=post_fcnet_activation, padding="same", data_format="channels_last", name="conv_out")(last_layer) # Add (optional) post-fc-stack after last Conv2D layer. for i, out_size in enumerate(post_fcnet_hiddens[1:] + [num_outputs]): last_layer = tf.keras.layers.Dense( out_size, name="post_fcnet_{}".format(i + 1), activation=post_fcnet_activation if i < len(post_fcnet_hiddens) - 1 else None, kernel_initializer=normc_initializer(1.0))( last_layer) else: last_cnn = last_layer = tf.keras.layers.Conv2D( num_outputs, [1, 1], activation=None, padding="same", data_format="channels_last", name="conv_out")(last_layer) if last_cnn.shape[1] != 1 or last_cnn.shape[2] != 1: raise ValueError( "Given `conv_filters` ({}) do not result in a [B, 1, " "1, {} (`num_outputs`)] shape (but in {})! Please " "adjust your Conv2D stack such that the dims 1 and 2 " "are both 1.".format(self.model_config["conv_filters"], self.num_outputs, list(last_cnn.shape))) # num_outputs not known -> Flatten, then set self.num_outputs # to the resulting number of nodes. else: self.last_layer_is_flattened = True last_layer = tf.keras.layers.Flatten( data_format="channels_last")(last_layer) # Add (optional) post-fc-stack after last Conv2D layer. for i, out_size in enumerate(post_fcnet_hiddens): last_layer = tf.keras.layers.Dense( out_size, name="post_fcnet_{}".format(i), activation=post_fcnet_activation, kernel_initializer=normc_initializer(1.0))(last_layer) self.num_outputs = last_layer.shape[1] logits_out = last_layer # Build the value layers if vf_share_layers: if not self.last_layer_is_flattened: last_layer = tf.keras.layers.Lambda( lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer) value_out = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01))(last_layer) else: # build a parallel set of hidden layers for the value net last_layer = inputs for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=stride if isinstance(stride, (list, tuple)) else (stride, stride), activation=activation, padding="same", data_format="channels_last", name="conv_value_{}".format(i))(last_layer) out_size, kernel, stride = filters[-1] last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=stride if isinstance(stride, (list, tuple)) else (stride, stride), activation=activation, padding="valid", data_format="channels_last", name="conv_value_{}".format(len(filters)))(last_layer) last_layer = tf.keras.layers.Conv2D( 1, [1, 1], activation=None, padding="same", data_format="channels_last", name="conv_value_out")(last_layer) value_out = tf.keras.layers.Lambda( lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer) self.base_model = tf.keras.Model(inputs, [logits_out, value_out]) # Optional: framestacking obs/new_obs for Atari. if self.traj_view_framestacking: from_ = model_config["num_framestacks"] - 1 self.view_requirements[SampleBatch.OBS].shift = \ "-{}:0".format(from_) self.view_requirements[SampleBatch.OBS].shift_from = -from_ self.view_requirements[SampleBatch.OBS].shift_to = 0 self.view_requirements[SampleBatch.NEXT_OBS] = ViewRequirement( data_col=SampleBatch.OBS, shift="-{}:1".format(from_ - 1), space=self.view_requirements[SampleBatch.OBS].space, used_for_compute_actions=False, )
def __init__(self, obs_space, action_space, num_outputs, model_config, name): # TODO: (sven) Support Dicts as well. assert isinstance(obs_space.original_space, (Tuple)), \ "`obs_space.original_space` must be Tuple!" nn.Module.__init__(self) TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) # Atari type CNNs or IMPALA type CNNs (with residual layers)? self.cnn_type = self.model_config["custom_model_config"].get( "conv_type", "atari") # Build the CNN(s) given obs_space's image components. self.cnns = {} concat_size = 0 for i, component in enumerate(obs_space.original_space): # Image space. if len(component.shape) == 3: config = { "conv_filters": model_config.get("conv_filters", get_filter_config(component.shape)), "conv_activation": model_config.get("conv_activation"), } if self.cnn_type == "atari": cnn = ModelCatalog.get_model_v2(component, action_space, num_outputs=None, model_config=config, framework="torch", name="cnn_{}".format(i)) else: cnn = TorchImpalaVisionNet(component, action_space, num_outputs=None, model_config=config, name="cnn_{}".format(i)) concat_size += cnn.num_outputs self.cnns[i] = cnn self.add_module("cnn_{}".format(i), cnn) # Discrete inputs -> One-hot encode. elif isinstance(component, Discrete): concat_size += component.n # TODO: (sven) Multidiscrete (see e.g. our auto-LSTM wrappers). # Everything else (1D Box). else: assert len(component.shape) == 1, \ "Only input Box 1D or 3D spaces allowed!" concat_size += component.shape[-1] self.logits_layer = None self.value_layer = None self._value_out = None if num_outputs: # Action-distribution head. self.logits_layer = SlimFC( in_size=concat_size, out_size=num_outputs, activation_fn=None, ) # Create the value branch model. self.value_layer = SlimFC( in_size=concat_size, out_size=1, activation_fn=None, initializer=torch_normc_initializer(0.01)) else: self.num_outputs = concat_size
def __init__(self, obs_space, action_space, num_outputs, model_config, name): self.original_space = (obs_space.original_space if hasattr( obs_space, "original_space") else obs_space) self.processed_obs_space = ( self.original_space if model_config.get("_disable_preprocessor_api") else obs_space) super().__init__(self.original_space, action_space, num_outputs, model_config, name) self.flattened_input_space = flatten_space(self.original_space) # Build the CNN(s) given obs_space's image components. self.cnns = {} self.one_hot = {} self.flatten_dims = {} self.flatten = {} concat_size = 0 for i, component in enumerate(self.flattened_input_space): # Image space. if len(component.shape) == 3: config = { "conv_filters": model_config["conv_filters"] if "conv_filters" in model_config else get_filter_config(component.shape), "conv_activation": model_config.get("conv_activation"), "post_fcnet_hiddens": [], } self.cnns[i] = ModelCatalog.get_model_v2( component, action_space, num_outputs=None, model_config=config, framework="tf", name="cnn_{}".format(i), ) concat_size += self.cnns[i].num_outputs # Discrete|MultiDiscrete inputs -> One-hot encode. elif isinstance(component, (Discrete, MultiDiscrete)): if isinstance(component, Discrete): size = component.n else: size = sum(component.nvec) config = { "fcnet_hiddens": model_config["fcnet_hiddens"], "fcnet_activation": model_config.get("fcnet_activation"), "post_fcnet_hiddens": [], } self.one_hot[i] = ModelCatalog.get_model_v2( Box(-1.0, 1.0, (size, ), np.float32), action_space, num_outputs=None, model_config=config, framework="tf", name="one_hot_{}".format(i), ) concat_size += self.one_hot[i].num_outputs # Everything else (1D Box). else: size = int(np.product(component.shape)) config = { "fcnet_hiddens": model_config["fcnet_hiddens"], "fcnet_activation": model_config.get("fcnet_activation"), "post_fcnet_hiddens": [], } self.flatten[i] = ModelCatalog.get_model_v2( Box(-1.0, 1.0, (size, ), np.float32), action_space, num_outputs=None, model_config=config, framework="tf", name="flatten_{}".format(i), ) self.flatten_dims[i] = size concat_size += self.flatten[i].num_outputs # Optional post-concat FC-stack. post_fc_stack_config = { "fcnet_hiddens": model_config.get("post_fcnet_hiddens", []), "fcnet_activation": model_config.get("post_fcnet_activation", "relu"), } self.post_fc_stack = ModelCatalog.get_model_v2( Box(float("-inf"), float("inf"), shape=(concat_size, ), dtype=np.float32), self.action_space, None, post_fc_stack_config, framework="tf", name="post_fc_stack", ) # Actions and value heads. self.logits_and_value_model = None self._value_out = None if num_outputs: # Action-distribution head. concat_layer = tf.keras.layers.Input( (self.post_fc_stack.num_outputs, )) logits_layer = tf.keras.layers.Dense( num_outputs, activation=None, kernel_initializer=normc_initializer(0.01), name="logits", )(concat_layer) # Create the value branch model. value_layer = tf.keras.layers.Dense( 1, activation=None, kernel_initializer=normc_initializer(0.01), name="value_out", )(concat_layer) self.logits_and_value_model = tf.keras.models.Model( concat_layer, [logits_layer, value_layer]) else: self.num_outputs = self.post_fc_stack.num_outputs
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): if not model_config.get("conv_filters"): model_config["conv_filters"] = get_filter_config(obs_space.shape) super(CustomVisionNetwork, self).__init__(obs_space, action_space, num_outputs, model_config, name) activation = get_activation_fn( self.model_config.get("conv_activation"), framework="tf") filters = self.model_config["conv_filters"] assert len(filters) > 0,\ "Must provide at least 1 entry in `conv_filters`!" # Post FC net config. post_fcnet_hiddens = model_config.get("post_fcnet_hiddens", []) post_fcnet_activation = get_activation_fn( model_config.get("post_fcnet_activation"), framework="tf") no_final_linear = self.model_config.get("no_final_linear") vf_share_layers = self.model_config.get("vf_share_layers") self.traj_view_framestacking = False # Perform Atari framestacking via traj. view API. if model_config.get("num_framestacks") != "auto" and \ model_config.get("num_framestacks", 0) > 1: input_shape = obs_space.shape + (model_config["num_framestacks"], ) self.data_format = "channels_first" self.traj_view_framestacking = True else: input_shape = obs_space.shape self.data_format = "channels_last" inputs = tf.keras.layers.Input(shape=input_shape, name="observations") #is_training = tf.keras.layers.Input( # shape=(), dtype=tf.bool, batch_size=1, name="is_training") last_layer = inputs # Whether the last layer is the output of a Flattened (rather than # a n x (1,1) Conv2D). self.last_layer_is_flattened = False # Build the action layers for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): if i == 1: last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), padding="same", data_format="channels_last", name="conv{}".format(i))(last_layer) #last_layer = tf.keras.layers.BatchNormalization()(last_layer, training=is_training[0]) last_layer = tf.keras.layers.ReLU()(last_layer) else: input_layer = last_layer last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), padding="same", data_format="channels_last", name="conv{}".format(i * 2 - 2))(last_layer) #last_layer = tf.keras.layers.BatchNormalization()(last_layer, training=is_training[0]) last_layer = tf.keras.layers.ReLU()(last_layer) last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), padding="same", data_format="channels_last", name="conv{}".format(i * 2 - 1))(last_layer) #last_layer = tf.keras.layers.BatchNormalization()(last_layer, training=is_training[0]) last_layer = tf.keras.layers.Add()([input_layer, last_layer]) last_layer = tf.keras.layers.ReLU()(last_layer) out_size, kernel, stride = filters[-1] p_layer = tf.keras.layers.Conv2D(filters=out_size, kernel_size=kernel, strides=(stride, stride), padding="valid", data_format="channels_last", name="conv{}".format( 2 * len(filters)))(last_layer) p_layer = tf.keras.layers.ReLU()(p_layer) # last_layer = tf1.layers.AveragePooling2D((2,2),(2,2))(last_layer) #p_layer = tf.keras.layers.Flatten(data_format="channels_last")(p_layer) v_layer = tf.keras.layers.Conv2D( filters=1, kernel_size=kernel, strides=(stride, stride), padding="valid", data_format="channels_last", name="conv{}".format(2 * len(filters) + 1))(last_layer) v_layer = tf.keras.layers.ReLU()(v_layer) # last_layer = tf1.layers.AveragePooling2D((2,2),(2,2))(last_layer) p_layer = tf.keras.layers.Flatten(data_format="channels_last")(p_layer) v_layer = tf.keras.layers.Flatten(data_format="channels_last")(v_layer) self.last_layer_is_flattened = True ''' # Add (optional) post-fc-stack after last Conv2D layer. for i, out_size in enumerate(post_fcnet_hiddens): last_layer = tf.keras.layers.Dense( out_size, name="post_fcnet_{}".format(i), activation=post_fcnet_activation, kernel_initializer=normc_initializer(1.0))(last_layer) ''' self.num_outputs_p = p_layer.shape[1] self.num_outputs_v = v_layer.shape[1] logits_out = p_layer self._value_out = v_layer ''' # Add (optional) post-fc-stack after last Conv2D layer. for i, out_size in enumerate(post_fcnet_hiddens): last_layer = tf.keras.layers.Dense( out_size, name="post_fcnet_{}".format(i), activation=post_fcnet_activation, kernel_initializer=normc_initializer(1.0))(last_layer) ''' ''' # Build the value layers if vf_share_layers: last_layer = tf.keras.layers.Flatten( data_format="channels_last")(last_layer) #last_layer = tf.keras.layers.Lambda( # lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer) value_out = tf.keras.layers.Dense( 1, name="value_out", activation=None, kernel_initializer=normc_initializer(0.01))(last_layer) else: # build a parallel set of hidden layers for the value net last_layer = inputs for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), activation=activation, padding="same", data_format="channels_last", name="conv_value_{}".format(i))(last_layer) out_size, kernel, stride = filters[-1] last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), activation=activation, padding="valid", data_format="channels_last", name="conv_value_{}".format(len(filters)))(last_layer) last_layer = tf.keras.layers.Conv2D( 1, [1, 1], activation=None, padding="same", data_format="channels_last", name="conv_value_out")(last_layer) value_out = tf.keras.layers.Lambda( lambda x: tf.squeeze(x, axis=[1, 2]))(last_layer) ''' self.base_model = tf.keras.Model(inputs, [p_layer, self._value_out]) self.base_model.summary()
def __init__( self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str, ): if not model_config.get("conv_filters"): model_config["conv_filters"] = get_filter_config(obs_space.shape) TorchModelV2.__init__( self, obs_space, action_space, num_outputs, model_config, name ) nn.Module.__init__(self) activation = self.model_config.get("conv_activation") filters = self.model_config["conv_filters"] assert len(filters) > 0, "Must provide at least 1 entry in `conv_filters`!" # Post FC net config. post_fcnet_hiddens = model_config.get("post_fcnet_hiddens", []) post_fcnet_activation = get_activation_fn( model_config.get("post_fcnet_activation"), framework="torch" ) no_final_linear = self.model_config.get("no_final_linear") vf_share_layers = self.model_config.get("vf_share_layers") # Whether the last layer is the output of a Flattened (rather than # a n x (1,1) Conv2D). self.last_layer_is_flattened = False self._logits = None layers = [] (w, h, in_channels) = obs_space.shape in_size = [w, h] for out_channels, kernel, stride in filters[:-1]: padding, out_size = same_padding(in_size, kernel, stride) layers.append( SlimConv2d( in_channels, out_channels, kernel, stride, padding, activation_fn=activation, ) ) in_channels = out_channels in_size = out_size out_channels, kernel, stride = filters[-1] # No final linear: Last layer has activation function and exits with # num_outputs nodes (this could be a 1x1 conv or a FC layer, depending # on `post_fcnet_...` settings). if no_final_linear and num_outputs: out_channels = out_channels if post_fcnet_hiddens else num_outputs layers.append( SlimConv2d( in_channels, out_channels, kernel, stride, None, # padding=valid activation_fn=activation, ) ) # Add (optional) post-fc-stack after last Conv2D layer. layer_sizes = post_fcnet_hiddens[:-1] + ( [num_outputs] if post_fcnet_hiddens else [] ) for i, out_size in enumerate(layer_sizes): layers.append( SlimFC( in_size=out_channels, out_size=out_size, activation_fn=post_fcnet_activation, initializer=normc_initializer(1.0), ) ) out_channels = out_size # Finish network normally (w/o overriding last layer size with # `num_outputs`), then add another linear one of size `num_outputs`. else: layers.append( SlimConv2d( in_channels, out_channels, kernel, stride, None, # padding=valid activation_fn=activation, ) ) # num_outputs defined. Use that to create an exact # `num_output`-sized (1,1)-Conv2D. if num_outputs: in_size = [ np.ceil((in_size[0] - kernel[0]) / stride), np.ceil((in_size[1] - kernel[1]) / stride), ] padding, _ = same_padding(in_size, [1, 1], [1, 1]) if post_fcnet_hiddens: layers.append(nn.Flatten()) in_size = out_channels # Add (optional) post-fc-stack after last Conv2D layer. for i, out_size in enumerate(post_fcnet_hiddens + [num_outputs]): layers.append( SlimFC( in_size=in_size, out_size=out_size, activation_fn=post_fcnet_activation if i < len(post_fcnet_hiddens) - 1 else None, initializer=normc_initializer(1.0), ) ) in_size = out_size # Last layer is logits layer. self._logits = layers.pop() else: self._logits = SlimConv2d( out_channels, num_outputs, [1, 1], 1, padding, activation_fn=None, ) # num_outputs not known -> Flatten, then set self.num_outputs # to the resulting number of nodes. else: self.last_layer_is_flattened = True layers.append(nn.Flatten()) self._convs = nn.Sequential(*layers) # If our num_outputs still unknown, we need to do a test pass to # figure out the output dimensions. This could be the case, if we have # the Flatten layer at the end. if self.num_outputs is None: # Create a B=1 dummy sample and push it through out conv-net. dummy_in = ( torch.from_numpy(self.obs_space.sample()) .permute(2, 0, 1) .unsqueeze(0) .float() ) dummy_out = self._convs(dummy_in) self.num_outputs = dummy_out.shape[1] # Build the value layers self._value_branch_separate = self._value_branch = None if vf_share_layers: self._value_branch = SlimFC( out_channels, 1, initializer=normc_initializer(0.01), activation_fn=None ) else: vf_layers = [] (w, h, in_channels) = obs_space.shape in_size = [w, h] for out_channels, kernel, stride in filters[:-1]: padding, out_size = same_padding(in_size, kernel, stride) vf_layers.append( SlimConv2d( in_channels, out_channels, kernel, stride, padding, activation_fn=activation, ) ) in_channels = out_channels in_size = out_size out_channels, kernel, stride = filters[-1] vf_layers.append( SlimConv2d( in_channels, out_channels, kernel, stride, None, activation_fn=activation, ) ) vf_layers.append( SlimConv2d( in_channels=out_channels, out_channels=1, kernel=1, stride=1, padding=None, activation_fn=None, ) ) self._value_branch_separate = nn.Sequential(*vf_layers) # Holds the current "base" output (before logits layer). self._features = None
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): if not model_config.get("conv_filters"): model_config["conv_filters"] = get_filter_config(obs_space.shape) TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) activation = self.model_config.get("conv_activation") filters = self.model_config["conv_filters"] assert len(filters) > 0,\ "Must provide at least 1 entry in `conv_filters`!" # Post FC net config. post_fcnet_hiddens = model_config.get("post_fcnet_hiddens", []) post_fcnet_activation = get_activation_fn( model_config.get("post_fcnet_activation"), framework="torch") no_final_linear = self.model_config.get("no_final_linear") vf_share_layers = self.model_config.get("vf_share_layers") # Whether the last layer is the output of a Flattened (rather than # a n x (1,1) Conv2D). self.last_layer_is_flattened = False self._logits = None self.traj_view_framestacking = False layers = [] # Perform Atari framestacking via traj. view API. if model_config.get("num_framestacks") != "auto" and \ model_config.get("num_framestacks", 0) > 1: (w, h) = obs_space.shape in_channels = model_config["num_framestacks"] self.traj_view_framestacking = True else: (w, h, in_channels) = obs_space.shape in_size = [w, h] for out_channels, kernel, stride in filters[:-1]: padding, out_size = same_padding(in_size, kernel, [stride, stride]) layers.append( SlimConv2d(in_channels, out_channels, kernel, stride, padding, activation_fn=activation)) in_channels = out_channels in_size = out_size out_channels, kernel, stride = filters[-1] # No final linear: Last layer has activation function and exits with # num_outputs nodes (this could be a 1x1 conv or a FC layer, depending # on `post_fcnet_...` settings). if no_final_linear and num_outputs: out_channels = out_channels if post_fcnet_hiddens else num_outputs layers.append( SlimConv2d( in_channels, out_channels, kernel, stride, None, # padding=valid activation_fn=activation)) # Add (optional) post-fc-stack after last Conv2D layer. layer_sizes = post_fcnet_hiddens[:-1] + ( [num_outputs] if post_fcnet_hiddens else []) for i, out_size in enumerate(layer_sizes): layers.append( SlimFC(in_size=out_channels, out_size=out_size, activation_fn=post_fcnet_activation, initializer=normc_initializer(1.0))) out_channels = out_size # Finish network normally (w/o overriding last layer size with # `num_outputs`), then add another linear one of size `num_outputs`. else: layers.append( SlimConv2d( in_channels, out_channels, kernel, stride, None, # padding=valid activation_fn=activation)) # num_outputs defined. Use that to create an exact # `num_output`-sized (1,1)-Conv2D. if num_outputs: in_size = [ np.ceil((in_size[0] - kernel[0]) / stride), np.ceil((in_size[1] - kernel[1]) / stride) ] padding, _ = same_padding(in_size, [1, 1], [1, 1]) if post_fcnet_hiddens: layers.append(nn.Flatten()) in_size = out_channels # Add (optional) post-fc-stack after last Conv2D layer. for i, out_size in enumerate(post_fcnet_hiddens + [num_outputs]): layers.append( SlimFC(in_size=in_size, out_size=out_size, activation_fn=post_fcnet_activation if i < len(post_fcnet_hiddens) - 1 else None, initializer=normc_initializer(1.0))) in_size = out_size # Last layer is logits layer. self._logits = layers.pop() else: self._logits = SlimConv2d(out_channels, num_outputs, [1, 1], 1, padding, activation_fn=None) # num_outputs not known -> Flatten, then set self.num_outputs # to the resulting number of nodes. else: self.last_layer_is_flattened = True layers.append(nn.Flatten()) self.num_outputs = out_channels self._convs = nn.Sequential(*layers) # Build the value layers self._value_branch_separate = self._value_branch = None if vf_share_layers: self._value_branch = SlimFC(out_channels, 1, initializer=normc_initializer(0.01), activation_fn=None) else: vf_layers = [] if self.traj_view_framestacking: (w, h) = obs_space.shape in_channels = model_config["num_framestacks"] else: (w, h, in_channels) = obs_space.shape in_size = [w, h] for out_channels, kernel, stride in filters[:-1]: padding, out_size = same_padding(in_size, kernel, [stride, stride]) vf_layers.append( SlimConv2d(in_channels, out_channels, kernel, stride, padding, activation_fn=activation)) in_channels = out_channels in_size = out_size out_channels, kernel, stride = filters[-1] vf_layers.append( SlimConv2d(in_channels, out_channels, kernel, stride, None, activation_fn=activation)) vf_layers.append( SlimConv2d(in_channels=out_channels, out_channels=1, kernel=1, stride=1, padding=None, activation_fn=None)) self._value_branch_separate = nn.Sequential(*vf_layers) # Holds the current "base" output (before logits layer). self._features = None # Optional: framestacking obs/new_obs for Atari. if self.traj_view_framestacking: from_ = model_config["num_framestacks"] - 1 self.view_requirements[SampleBatch.OBS].shift = \ "-{}:0".format(from_) self.view_requirements[SampleBatch.OBS].shift_from = -from_ self.view_requirements[SampleBatch.OBS].shift_to = 0 self.view_requirements[SampleBatch.NEXT_OBS] = ViewRequirement( data_col=SampleBatch.OBS, shift="-{}:1".format(from_ - 1), space=self.view_requirements[SampleBatch.OBS].space, )
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): if not model_config.get("conv_filters"): model_config["conv_filters"] = get_filter_config(obs_space.shape) super(CustomVisionNetwork, self).__init__(obs_space, action_space, num_outputs, model_config, name) filters = self.model_config["conv_filters"] assert len(filters) > 0,\ "Must provide at least 1 entry in `conv_filters`!" input_shape = obs_space.shape self.data_format = "channels_last" inputs = tf.keras.layers.Input(shape=input_shape, name="observations") last_layer = inputs self.last_layer_is_flattened = False # Build the action layers for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): if i == 1: last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), padding="same", data_format="channels_last", name="conv{}".format(i))(last_layer) last_layer = tf.keras.layers.ReLU()(last_layer) else: input_layer = last_layer last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), padding="same", data_format="channels_last", name="conv{}".format(i * 2 - 2))(last_layer) last_layer = tf.keras.layers.ReLU()(last_layer) last_layer = tf.keras.layers.Conv2D( out_size, kernel, strides=(stride, stride), padding="same", data_format="channels_last", name="conv{}".format(i * 2 - 1))(last_layer) last_layer = tf.keras.layers.Add()([input_layer, last_layer]) last_layer = tf.keras.layers.ReLU()(last_layer) out_size, kernel, stride = filters[-1] p_layer = tf.keras.layers.Conv2D(filters=out_size, kernel_size=kernel, strides=(stride, stride), padding="valid", data_format="channels_last", name="conv{}".format( 2 * len(filters)))(last_layer) p_layer = tf.keras.layers.ReLU()(p_layer) v_layer = tf.keras.layers.Conv2D( filters=1, kernel_size=kernel, strides=(stride, stride), padding="valid", data_format="channels_last", name="conv{}".format(2 * len(filters) + 1))(last_layer) v_layer = tf.keras.layers.ReLU()(v_layer) p_layer = tf.keras.layers.Flatten(data_format="channels_last")(p_layer) v_layer = tf.keras.layers.Flatten(data_format="channels_last")(v_layer) self.last_layer_is_flattened = True self.num_outputs_p = p_layer.shape[1] self.num_outputs_v = v_layer.shape[1] self._value_out = v_layer self.base_model = tf.keras.Model(inputs, [p_layer, self._value_out]) self.base_model.summary()