def _build_q_net(self, name_): # actions are concatenated with flattened obs critic_hidden_activation = self.model_config[ "critic_hidden_activation"] critic_hiddens = self.model_config["critic_hiddens"] activation = get_activation_fn(critic_hidden_activation, framework="torch") q_net = nn.Sequential() ins = (self.obs_ins if self._is_action_discrete else self.obs_ins + self.action_dim) for i, n in enumerate(critic_hiddens): q_net.add_module( f"{name_}_hidden_{i}", SlimFC( ins, n, initializer=torch.nn.init.xavier_uniform_, activation_fn=activation, ), ) ins = n q_net.add_module( f"{name_}_out", SlimFC( ins, self.action_space.n if self._is_action_discrete else 1, initializer=torch.nn.init.xavier_uniform_, activation_fn=None, ), ) return q_net
def __init__(self, num_states, num_actions, num_nodes=100): """ Initialize a deep Q-learning network for testing algorithm in_features: number of features of input. num_actions: number of action-value to output, one-to-one correspondence to action in game. """ super(DQNModule, self).__init__() layers = [] prev_layer_size = num_states self.num_actions = num_actions self.num_nodes = num_nodes # Create layers 0 to second-last. self.hidden_out_size = 32 for i, size in enumerate([512, 128, self.hidden_out_size]): layers.append( SlimFC(in_size=prev_layer_size, out_size=size, initializer=torch_normc_initializer(1.0), activation_fn=nn.ReLU)) prev_layer_size = size # if i == 0: # layers.append(nn.Dropout(p=0.3)) # Add a batch norm layer. # layers.append(nn.BatchNorm1d(prev_layer_size)) self._value_branch = SlimFC(in_size=prev_layer_size, out_size=num_actions, initializer=torch_normc_initializer(1.0), activation_fn=None) self._hidden_layers = nn.Sequential(*layers)
def __init__(self, obs_space, action_space, num_outputs, model_config, name): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) print(model_config) hiddens = model_config.get("fcnet_hiddens") activation = _get_activation_fn(model_config.get("fcnet_activation")) logger.debug("Constructing fcnet {} {}".format(hiddens, activation)) layers = [] last_layer_size = np.product(obs_space.shape) for size in hiddens: layers.append( SlimFC(in_size=last_layer_size, out_size=size, initializer=normc_initializer(1.0), activation_fn=activation)) last_layer_size = size self._hidden_layers = nn.Sequential(*layers) self._logits = SlimFC(in_size=last_layer_size, out_size=num_outputs, initializer=normc_initializer(0.01), activation_fn=None) self._value_branch = SlimFC(in_size=last_layer_size, out_size=1, initializer=normc_initializer(1.0), activation_fn=None) self._cur_value = None
def __init__(self, num_states=4, num_actions=18): """ Initialize a deep Q-learning network for testing algorithm in_features: number of features of input. num_actions: number of action-value to output, one-to-one correspondence to action in game. """ super(DQNModule, self).__init__() self.device = torch.device(f"cuda:{dqn_config['cuda_id']}" if torch. cuda.is_available() else "cpu") layers = [] prev_layer_size = num_states self.num_actions = num_actions # Create layers 0 to second-last. self.hidden_out_size = 64 for size in [512, 256, 128, self.hidden_out_size]: layers.append( SlimFC(in_size=prev_layer_size, out_size=size, initializer=torch_normc_initializer(1.0), activation_fn=nn.ReLU)) prev_layer_size = size # Add a batch norm layer. # layers.append(nn.BatchNorm1d(prev_layer_size)) self._value_branch = SlimFC(in_size=prev_layer_size, out_size=num_actions, initializer=torch_normc_initializer(1.0), activation_fn=None) self._hidden_layers = nn.Sequential(*layers)
def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kwargs): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) layers = [] prev_layer_size = int(np.product(obs_space.shape)) self._logits = None # Create layers 0 to second-last. for size in [256, 256]: layers.append( SlimFC(in_size=prev_layer_size, out_size=size, initializer=torch_normc_initializer(1.0), activation_fn=nn.ReLU)) prev_layer_size = size # Add a batch norm layer. layers.append(nn.BatchNorm1d(prev_layer_size)) self._logits = SlimFC(in_size=prev_layer_size, out_size=self.num_outputs, initializer=torch_normc_initializer(0.01), activation_fn=None) self._value_branch = SlimFC(in_size=prev_layer_size, out_size=1, initializer=torch_normc_initializer(1.0), activation_fn=None) self._hidden_layers = nn.Sequential(*layers) self._hidden_out = None
def __init__(self, obs_space, action_space, num_outputs, model_config, name, num_frames=3): nn.Module.__init__(self) super(TorchFrameStackingCartPoleModel, self).__init__(obs_space, action_space, None, model_config, name) self.num_frames = num_frames self.num_outputs = num_outputs # Construct actual (very simple) FC model. assert len(obs_space.shape) == 1 self.layer1 = SlimFC(in_size=obs_space.shape[0] * self.num_frames, out_size=64, activation_fn="relu") self.out = SlimFC(in_size=64, out_size=self.num_outputs, activation_fn="linear") self.values = SlimFC(in_size=64, out_size=1, activation_fn="linear") self._last_value = None self.view_requirements["prev_n_obs"] = ViewRequirement( data_col="obs", shift="-{}:0".format(num_frames - 1), space=obs_space) self.view_requirements["prev_rewards"] = ViewRequirement( data_col="rewards", shift=-1)
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) custom_configs = model_config.get("custom_model_config") self._sensor_seq_len = custom_configs.get("sensor_seq_len", 10) activation = model_config.get("fcnet_activation", "tanh") encoder_layer = nn.TransformerEncoderLayer(d_model=3, nhead=3, batch_first=True, dim_feedforward=128) self._transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=2) self._all_fc1 = SlimFC(in_size=3, out_size=64, initializer=normc_initializer(1.0), activation_fn=activation) self._all_fc2 = SlimFC(in_size=64, out_size=16, initializer=normc_initializer(1.0), activation_fn=activation) self._action_layer = SlimFC(in_size=16, out_size=num_outputs, initializer=normc_initializer(0.01), activation_fn=None) self._value_layer = SlimFC(in_size=16, out_size=1, initializer=normc_initializer(0.01), activation_fn=None) self._features = None
def __init__(self, obs_space, action_space, num_outputs, model_config, name): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) filters = model_config.get("conv_filters") if not filters: filters = _get_filter_config(obs_space.shape) layers = [] (w, h, in_channels) = obs_space.shape in_size = [w, h] for out_channels, kernel, stride in filters[:-1]: padding, out_size = valid_padding(in_size, kernel, [stride, stride]) layers.append( SlimConv2d(in_channels, out_channels, kernel, stride, padding)) in_channels = out_channels in_size = out_size out_channels, kernel, stride = filters[-1] layers.append( SlimConv2d(in_channels, out_channels, kernel, stride, None)) self._convs = nn.Sequential(*layers) self._logits = SlimFC(out_channels, num_outputs, initializer=nn.init.xavier_uniform_) self._value_branch = SlimFC(out_channels, 1, initializer=normc_initializer()) self._cur_value = None
def build_q_net(name_): act = get_activation_fn(critic_hidden_activation, framework="torch") init = nn.init.xavier_uniform_ # For discrete actions, only obs. q_net = nn.Sequential() ins = embed_dim # embed to encoder embed outs = self.critic_encoder.feature_dim q_net.add_module( "{}_hidden_{}".format(name_, "e"), SlimFC(ins, outs, initializer=init, activation_fn=act)) ins = outs for i, n in enumerate(critic_hiddens): q_net.add_module( "{}_hidden_{}".format(name_, i), SlimFC(ins, n, initializer=init, activation_fn=act)) ins = n q_net.add_module( "{}_out".format(name_), SlimFC(ins, q_outs, initializer=init, activation_fn=None)) return q_net
def build_q_net(name_): activation = get_activation_fn(critic_hidden_activation, framework="torch") # For continuous actions: Feed obs and actions (concatenated) # through the NN. For discrete actions, only obs. q_net = nn.Sequential() ins = self.obs_ins + self.action_dim for i, n in enumerate(critic_hiddens): q_net.add_module( "{}_hidden_{}".format(name_, i), SlimFC( ins, n, initializer=torch.nn.init.xavier_uniform_, activation_fn=activation, ), ) ins = n q_net.add_module( "{}_out".format(name_), SlimFC( ins, 1, initializer=torch.nn.init.xavier_uniform_, activation_fn=None, ), ) return q_net
def __init__( self, obs_space: gym.spaces.Space, fcnet_hiddens_per_candidate=(256, 32), ): """Initializes a QValueModel instance. Each document candidate receives one full Q-value stack, defined by `fcnet_hiddens_per_candidate`. The input to each of these Q-value stacks is always {[user] concat [document[i]] for i in document_candidates}. Extra model kwargs: fcnet_hiddens_per_candidate: List of layer-sizes for each(!) of the candidate documents. """ super().__init__() self.orig_obs_space = obs_space self.embedding_size = self.orig_obs_space["doc"]["0"].shape[0] self.num_candidates = len(self.orig_obs_space["doc"]) assert self.orig_obs_space["user"].shape[0] == self.embedding_size self.q_nets = nn.ModuleList() for i in range(self.num_candidates): layers = nn.Sequential() ins = 2 * self.embedding_size for j, h in enumerate(fcnet_hiddens_per_candidate): layers.add_module( f"q_layer_{i}_{j}", SlimFC(in_size=ins, out_size=h, activation_fn="relu"), ) ins = h layers.add_module(f"q_out_{i}", SlimFC(ins, 1, activation_fn=None)) self.q_nets.append(layers)
def __init__(self, obs_space, action_space, num_outputs, model_config, name): nn.Module.__init__(self) super().__init__(obs_space, action_space, None, model_config, name) self.cell_size = model_config["lstm_cell_size"] self.use_prev_action_reward = model_config[ "lstm_use_prev_action_reward"] self.action_dim = int(np.product(action_space.shape)) # Add prev-action/reward nodes to input to LSTM. if self.use_prev_action_reward: self.num_outputs += 1 + self.action_dim self.lstm = nn.LSTM(self.num_outputs, self.cell_size, batch_first=True) self.num_outputs = num_outputs # Postprocess LSTM output with another hidden layer and compute values. self._logits_branch = SlimFC( in_size=self.cell_size, out_size=self.num_outputs, activation_fn=None, initializer=torch.nn.init.xavier_uniform_) self._value_branch = SlimFC( in_size=self.cell_size, out_size=1, activation_fn=None, initializer=torch.nn.init.xavier_uniform_)
def __init__( self, in_dim: int, out_dim: int, num_heads: int, head_dim: int, input_layernorm: bool = False, output_activation: Union[str, callable] = None, **kwargs ): """Initializes a RelativeMultiHeadAttention nn.Module object. Args: in_dim (int): out_dim: The output dimension of this module. Also known as "attention dim". num_heads: The number of attention heads to use. Denoted `H` in [2]. head_dim: The dimension of a single(!) attention head Denoted `D` in [2]. input_layernorm: Whether to prepend a LayerNorm before everything else. Should be True for building a GTrXL. output_activation (Union[str, callable]): Optional activation function or activation function specifier (str). Should be "relu" for GTrXL. **kwargs: """ super().__init__(**kwargs) # No bias or non-linearity. self._num_heads = num_heads self._head_dim = head_dim # 3=Query, key, and value inputs. self._qkv_layer = SlimFC( in_size=in_dim, out_size=3 * num_heads * head_dim, use_bias=False ) self._linear_layer = SlimFC( in_size=num_heads * head_dim, out_size=out_dim, use_bias=False, activation_fn=output_activation, ) self._uvar = nn.Parameter(torch.zeros(num_heads, head_dim)) self._vvar = nn.Parameter(torch.zeros(num_heads, head_dim)) nn.init.xavier_uniform_(self._uvar) nn.init.xavier_uniform_(self._vvar) self.register_parameter("_uvar", self._uvar) self.register_parameter("_vvar", self._vvar) self._pos_proj = SlimFC( in_size=in_dim, out_size=num_heads * head_dim, use_bias=False ) self._rel_pos_embedding = RelativePositionEmbedding(out_dim) self._input_layernorm = None if input_layernorm: self._input_layernorm = torch.nn.LayerNorm(in_dim)
def __init__(self, input_size, fe_hidden_sizes=[128], cls_hidden_sizes=[128, 64]): super().__init__() assert len(fe_hidden_sizes) > 0 assert len(cls_hidden_sizes) > 0 layers = [] for size in fe_hidden_sizes: layers.append( SlimFC(in_size=input_size, out_size=size, initializer=normc_initializer(1.0), activation_fn=nn.ReLU)) input_size = size self.feature_extractor = nn.Sequential(*layers) input_size = fe_hidden_sizes[ -1] * 2 # Concatenate the features from the two samples. layers = [] for size in cls_hidden_sizes: layers.append( SlimFC(in_size=input_size, out_size=size, initializer=normc_initializer(1.0), activation_fn=nn.ReLU)) input_size = size layers.append( SlimFC(in_size=input_size, out_size=1, initializer=normc_initializer(1.0))) self.classifier = nn.Sequential(*layers)
def __init__(self, obs_space, action_space, num_outputs, model_config, name, cnn_shape): super().__init__(obs_space, action_space, num_outputs, model_config, name) self.lstm_state_size = 16 self.cnn_shape = list(cnn_shape) self.visual_size_in = cnn_shape[0] * cnn_shape[1] * cnn_shape[2] # MobileNetV2 has a flat output of (1000,). self.visual_size_out = 1000 # Load the MobileNetV2 from torch.hub. self.cnn_model = torch.hub.load("pytorch/vision:v0.6.0", "mobilenet_v2", pretrained=True) self.lstm = nn.LSTM(self.visual_size_out, self.lstm_state_size, batch_first=True) # Postprocess LSTM output with another hidden layer and compute values. self.logits = SlimFC(self.lstm_state_size, self.num_outputs) self.value_branch = SlimFC(self.lstm_state_size, 1) # Holds the current "base" output (before logits layer). self._features = None
def __init__(self, observation_space, action_space, num_outputs, model_config, name): TorchModelV2.__init__(self, observation_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) # Non-shared initial layer. self.first_layer = SlimFC( int(np.product(observation_space.shape)), 64, activation_fn=nn.ReLU, initializer=torch.nn.init.xavier_uniform_) # Non-shared final layer. self.last_layer = SlimFC( 64, self.num_outputs, activation_fn=None, initializer=torch.nn.init.xavier_uniform_) self.vf = SlimFC( 64, 1, activation_fn=None, initializer=torch.nn.init.xavier_uniform_, ) self._global_shared_layer = TORCH_GLOBAL_SHARED_LAYER self._output = None
def __init__(self, obs_space, action_space, num_outputs, model_config, name): TorchModelV2.__init__(self, custom_input_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) self.torch_sub_model = TorchFC(custom_input_space, action_space, num_outputs, model_config, name) prev_safe_layer_size = int(np.product(custom_input_space.shape)) vf_layers = [] activation = model_config.get("fcnet_activation") hiddens = [32] for size in hiddens: vf_layers.append( SlimFC(in_size=prev_safe_layer_size, out_size=size, activation_fn=activation, initializer=normc_initializer(1.0))) prev_safe_layer_size = size vf_layers.append( SlimFC(in_size=prev_safe_layer_size, out_size=1, initializer=normc_initializer(0.01), activation_fn=None)) self.safe_branch_separate = nn.Sequential(*vf_layers) self.last_in = None
def __init__(self, obs_space, action_space, num_outputs, model_config, name): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) if action_space != Tuple([Discrete(2), Discrete(2)]): raise ValueError( "This model only supports the [2, 2] action space") # Output of the model (normally 'logits', but for an autoregressive # dist this is more like a context/feature layer encoding the obs) self.context_layer = SlimFC( in_size=obs_space.shape[0], out_size=num_outputs, initializer=normc_init_torch(1.0), activation_fn=nn.Tanh, ) # V(s) self.value_branch = SlimFC( in_size=num_outputs, out_size=1, initializer=normc_init_torch(0.01), activation_fn=None, ) # P(a1 | obs) self.a1_logits = SlimFC(in_size=num_outputs, out_size=2, activation_fn=None, initializer=normc_init_torch(0.01)) class _ActionModel(nn.Module): def __init__(self): nn.Module.__init__(self) self.a2_hidden = SlimFC(in_size=1, out_size=16, activation_fn=nn.Tanh, initializer=normc_init_torch(1.0)) self.a2_logits = SlimFC(in_size=16, out_size=2, activation_fn=None, initializer=normc_init_torch(0.01)) def forward(self_, ctx_input, a1_input): a1_logits = self.a1_logits(ctx_input) a2_logits = self_.a2_logits(self_.a2_hidden(a1_input)) return a1_logits, a2_logits # P(a2 | a1) # --note: typically you'd want to implement P(a2 | a1, obs) as follows: # a2_context = tf.keras.layers.Concatenate(axis=1)( # [ctx_input, a1_input]) self.action_module = _ActionModel() self._context = None
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): nn.Module.__init__(self) super().__init__(obs_space, action_space, None, model_config, name) if isinstance(action_space, Discrete): self.action_dim = action_space.n elif isinstance(action_space, MultiDiscrete): self.action_dim = np.product(action_space.nvec) elif action_space.shape is not None: self.action_dim = int(np.product(action_space.shape)) else: self.action_dim = int(len(action_space)) cfg = model_config self.attention_dim = cfg["attention_dim"] # Construct GTrXL sub-module w/ num_outputs=None (so it does not # create a logits/value output; we'll do this ourselves in this wrapper # here). self.gtrxl = GTrXLNet( obs_space, action_space, None, model_config, "gtrxl", num_transformer_units=cfg["attention_num_transformer_units"], attention_dim=self.attention_dim, num_heads=cfg["attention_num_heads"], head_dim=cfg["attention_head_dim"], memory_inference=cfg["attention_memory_inference"], memory_training=cfg["attention_memory_training"], position_wise_mlp_dim=cfg["attention_position_wise_mlp_dim"], init_gru_gate_bias=cfg["attention_init_gru_gate_bias"], ) # Set final num_outputs to correct value (depending on action space). self.num_outputs = num_outputs # Postprocess GTrXL output with another hidden layer and compute # values. self._logits_branch = SlimFC( in_size=self.attention_dim, out_size=self.num_outputs, activation_fn=None, initializer=torch.nn.init.xavier_uniform_) self._value_branch = SlimFC( in_size=self.attention_dim, out_size=1, activation_fn=None, initializer=torch.nn.init.xavier_uniform_) self.view_requirements = self.gtrxl.view_requirements
def __init__(self): nn.Module.__init__(self) self.a2_hidden = SlimFC(in_size=1, out_size=16, activation_fn=nn.Tanh, initializer=normc_init_torch(1.0)) self.a2_logits = SlimFC(in_size=16, out_size=2, activation_fn=None, initializer=normc_init_torch(0.01))
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) # Nonlinearity for fully connected net (tanh, relu). Default: "tanh" activation = model_config.get("fcnet_activation") # Number of hidden layers for fully connected net. Default: [256, 256] hiddens = [256, 256] # model_config.get("fcnet_hiddens", []) # Whether to skip the final linear layer used to resize the hidden layer # outputs to size `num_outputs`. If True, then the last hidden layer # should already match num_outputs. # no_final_linear = False self.vf_share_layers = model_config.get("vf_share_layers") self.free_log_std = False self._embedd = nn.Embedding( int(obs_space.high[0][-1]) + 1, CARD_EMBEDD_SIZE) # Player Hot Encoded = 3 * Number of Cards Played per trick = 4 # CARD_EMBEDD_SIZE * Number of Cards Played per trick = 4 self._hidden_layers = self._build_hidden_layers( first_layer_size=FIRST_LAYER_SIZE, hiddens=hiddens, activation=activation) self._value_branch_separate = None self._value_embedding = None if not self.vf_share_layers: # Build a parallel set of hidden layers for the value net. self._value_embedding = nn.Embedding( int(obs_space.high[0][-1]) + 1, CARD_EMBEDD_SIZE) self._value_branch_separate = self._build_hidden_layers( first_layer_size=FIRST_LAYER_SIZE, hiddens=hiddens, activation=activation) self._logits = SlimFC(in_size=hiddens[-1], out_size=num_outputs, initializer=normc_initializer(0.01), activation_fn=None) self._value_branch = SlimFC(in_size=hiddens[-1], out_size=1, initializer=normc_initializer(1.0), activation_fn=None) # Holds the current "base" output (before logits layer). self._features = None # Holds the last input, in case value branch is separate. self._cards_in = None self._players_in = None
def __init__(self, in_dim, out_dim, num_heads, head_dim, rel_pos_encoder, input_layernorm=False, output_activation=None, **kwargs): """Initializes a RelativeMultiHeadAttention nn.Module object. Args: in_dim (int): out_dim (int): num_heads (int): The number of attention heads to use. Denoted `H` in [2]. head_dim (int): The dimension of a single(!) attention head Denoted `D` in [2]. rel_pos_encoder (: input_layernorm (bool): Whether to prepend a LayerNorm before everything else. Should be True for building a GTrXL. output_activation (Optional[tf.nn.activation]): Optional tf.nn activation function. Should be relu for GTrXL. **kwargs: """ super().__init__(**kwargs) # No bias or non-linearity. self._num_heads = num_heads self._head_dim = head_dim # 3=Query, key, and value inputs. self._qkv_layer = SlimFC( in_size=in_dim, out_size=3 * num_heads * head_dim, use_bias=False) self._linear_layer = SlimFC( in_size=num_heads * head_dim, out_size=out_dim, use_bias=False, activation_fn=output_activation) self._pos_proj = SlimFC( in_size=in_dim, out_size=num_heads * head_dim, use_bias=False) self._uvar = torch.zeros(num_heads, head_dim) self._vvar = torch.zeros(num_heads, head_dim) nn.init.xavier_uniform_(self._uvar) nn.init.xavier_uniform_(self._vvar) self._rel_pos_encoder = rel_pos_encoder self._input_layernorm = None if input_layernorm: self._input_layernorm = torch.nn.LayerNorm(in_dim)
def __init__(self, device, num_states=4, num_actions=18): """ Initialize a deep Q-learning network for testing algorithm in_features: number of features of input. num_actions: number of action-value to output, one-to-one correspondence to action in game. """ super(DQNActionModule, self).__init__() self.device = device state_layers = [] action_layers = [] self.num_states = num_states self.num_actions = num_actions # Create layers 0 to second-last. state_prev_layer_size = num_states self.state_hidden_out_size = 32 for size in [256, 128, self.state_hidden_out_size]: state_layers.append( SlimFC(in_size=state_prev_layer_size, out_size=size, initializer=torch_normc_initializer(1.0), activation_fn=nn.ReLU)) state_prev_layer_size = size action_prev_layer_size = num_actions self.action_hidden_out_size = 32 for size in [64, self.action_hidden_out_size]: action_layers.append( SlimFC(in_size=action_prev_layer_size, out_size=size, initializer=torch_normc_initializer(1.0), activation_fn=nn.ReLU)) action_prev_layer_size = size layers = [] prev_layer_size = self.state_hidden_out_size + self.action_hidden_out_size self.hidden_out_size = 32 for size in [64, self.hidden_out_size]: layers.append( SlimFC(in_size=prev_layer_size, out_size=size, initializer=torch_normc_initializer(1.0), activation_fn=nn.ReLU)) prev_layer_size = size self._value_branch = SlimFC(in_size=self.hidden_out_size, out_size=1, initializer=torch_normc_initializer(1.0), activation_fn=None) self._state_hidden_layers = nn.Sequential(*state_layers) self._action_hidden_layers = nn.Sequential(*action_layers) self._hidden_layers = nn.Sequential(*layers)
def __init__(self, num_states, num_actions, dqn_config): """ Initialize a deep Q-learning network for testing algorithm in_features: number of features of input. num_actions: number of action-value to output, one-to-one correspondence to action in game. """ super(DQNTransitionModule, self).__init__() self.num_states = num_states self.num_actions = num_actions self.device = torch.device(f"cuda:{dqn_config['cuda_id']}" if torch.cuda.is_available() else "cpu") self.state_emb_model = None layers = [] prev_layer_size = num_states self.state_emb_size = 32 for i, size in enumerate([128, 64, self.state_emb_size]): layers.append( SlimFC( in_size=prev_layer_size, out_size=size, initializer=torch_normc_initializer(1.0), activation_fn=nn.ReLU)) prev_layer_size = size self.state_emb_model = nn.Sequential(*layers).to(self.device) self.action_emb_model = None layers = [] prev_layer_size = num_actions self.action_emb_size = 16 for i, size in enumerate([64, self.action_emb_size]): layers.append( SlimFC( in_size=prev_layer_size, out_size=size, initializer=torch_normc_initializer(1.0), activation_fn=nn.ReLU)) prev_layer_size = size self.action_emb_model = nn.Sequential(*layers).to(self.device) self.transition_emb_model = None layers = [] prev_layer_size = self.state_emb_size + self.action_emb_size self.transition_emb_size = self.state_emb_size for i, size in enumerate([128, self.transition_emb_size]): layers.append( SlimFC( in_size=prev_layer_size, out_size=size, initializer=torch_normc_initializer(1.0), activation_fn=nn.ReLU)) prev_layer_size = size self.transition_emb_model = nn.Sequential(*layers).to(self.device)
def __init__(self, obs_space, action_space, num_outputs, model_config, name): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) self.conv1=SlimConv2d(obs_space.shape[0],32,3,2,1) self.conv2=SlimConv2d(32,32,3,2,1) self.conv3=SlimConv2d(32,32,3,2,1) self.conv4=SlimConv2d(32,32,3,2,1) self.fc1=SlimFC(32*6*6,512) self.fc_out=SlimFC(512,num_outputs)
def __init__(self, obs_space, action_space, num_outputs, model_config, name): nn.Module.__init__(self) super().__init__(obs_space, action_space, None, model_config, name) self.cell_size = model_config["lstm_cell_size"] self.time_major = model_config.get("_time_major", False) self.use_prev_action_reward = model_config[ "lstm_use_prev_action_reward"] self.action_dim = int(np.product(action_space.shape)) # Add prev-action/reward nodes to input to LSTM. if self.use_prev_action_reward: self.num_outputs += 1 + self.action_dim self.lstm = nn.LSTM(self.num_outputs, self.cell_size, batch_first=not self.time_major) self.num_outputs = num_outputs # Postprocess LSTM output with another hidden layer and compute values. self._logits_branch = SlimFC(in_size=self.cell_size, out_size=self.num_outputs, activation_fn=None, initializer=torch.nn.init.xavier_uniform_) self._value_branch = SlimFC(in_size=self.cell_size, out_size=1, activation_fn=None, initializer=torch.nn.init.xavier_uniform_) self.inference_view_requirements.update( dict( **{ SampleBatch.OBS: ViewRequirement(shift=0), SampleBatch.PREV_REWARDS: ViewRequirement(SampleBatch.REWARDS, shift=-1), SampleBatch.PREV_ACTIONS: ViewRequirement(SampleBatch.ACTIONS, space=self.action_space, shift=-1), })) for i in range(2): self.inference_view_requirements["state_in_{}".format(i)] = \ ViewRequirement( "state_out_{}".format(i), shift=-1, space=Box(-1.0, 1.0, shape=(self.cell_size,))) self.inference_view_requirements["state_out_{}".format(i)] = \ ViewRequirement( space=Box(-1.0, 1.0, shape=(self.cell_size,)))
def __init__(self, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, num_outputs: int, model_config: ModelConfigDict, name: str): nn.Module.__init__(self) super().__init__(obs_space, action_space, None, model_config, name) self.cell_size = model_config["lstm_cell_size"] self.time_major = model_config.get("_time_major", False) self.use_prev_action = model_config["lstm_use_prev_action"] self.use_prev_reward = model_config["lstm_use_prev_reward"] if isinstance(action_space, Discrete): self.action_dim = action_space.n elif isinstance(action_space, MultiDiscrete): self.action_dim = np.product(action_space.nvec) elif action_space.shape is not None: self.action_dim = int(np.product(action_space.shape)) else: self.action_dim = int(len(action_space)) # Add prev-action/reward nodes to input to LSTM. if self.use_prev_action: self.num_outputs += self.action_dim if self.use_prev_reward: self.num_outputs += 1 self.lstm = nn.LSTM(self.num_outputs, self.cell_size, batch_first=not self.time_major) self.num_outputs = num_outputs # Postprocess LSTM output with another hidden layer and compute values. self._logits_branch = SlimFC(in_size=self.cell_size, out_size=self.num_outputs, activation_fn=None, initializer=torch.nn.init.xavier_uniform_) self._value_branch = SlimFC(in_size=self.cell_size, out_size=1, activation_fn=None, initializer=torch.nn.init.xavier_uniform_) # Add prev-a/r to this model's view, if required. if model_config["lstm_use_prev_action"]: self.inference_view_requirements[SampleBatch.PREV_ACTIONS] = \ ViewRequirement(SampleBatch.ACTIONS, space=self.action_space, data_rel_pos=-1) if model_config["lstm_use_prev_reward"]: self.inference_view_requirements[SampleBatch.PREV_REWARDS] = \ ViewRequirement(SampleBatch.REWARDS, data_rel_pos=-1)
def __init__(self, observation_space, action_space, num_outputs, model_config, name): TorchModelV2.__init__(self, observation_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) # Non-shared initial layer. self.first_layer = SlimFC(int(np.product(observation_space.shape)), 32, activation_fn=nn.ReLU) # Non-shared final layer. self.last_layer = SlimFC(32, self.num_outputs, activation_fn=nn.ReLU) self.vf = SlimFC(32, 1, activation_fn=None)
def __init__(self, obs_space, action_space, num_outputs, model_config, name): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) # Base of the model self.model = TorchFC(obs_space, action_space, num_outputs, model_config, name) # Central VF maps (obs, opp_obs, opp_act) -> vf_pred input_size = 6 + 6 + 2 # obs + opp_obs + opp_act self.central_vf_dense = SlimFC(input_size, 16, activation_fn=nn.Tanh) self.central_vf_out = SlimFC(16, 1)
def __init__(self, obs_space, action_space, num_outputs, model_config, name): TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) self.size_in = 3 self.lstm_state_size = 64 self.grab_location = 1 self.lstm = nn.LSTM(self.size_in, self.lstm_state_size, batch_first=True) self.num_outputs = self.grab_location + 7 ## one grab location + seven (position + translation) # Postprocess LSTM output with another hidden layer and compute values. self.linear = SlimFC(self.lstm_state_size, self.num_outputs, activation_fn="tanh") self.value_branch = SlimFC(self.lstm_state_size, 1, activation_fn=None) self._features = None