def make_networks(self, config): if config['network.recurrent']: self.feature_network = RNN(config, self.device, env_spec=self.env_spec) else: self.feature_network = NN(config, self.device, env_spec=self.env_spec) feature_dim = config['network.hidden_sizes'][-1] if self.env_spec.control_type == 'Discrete': self.action_head = CategoricalHead(config, self.device, feature_dim, self.env_spec) elif self.env_spec.control_type == 'Continuous': self.action_head = DiagGaussianHead( config, self.device, feature_dim, self.env_spec, min_std=config['agent.min_std'], std_style=config['agent.std_style'], constant_std=config['agent.constant_std'], std_state_dependent=config['agent.std_state_dependent'], init_std=config['agent.init_std']) if not config['network.independent_V']: self.V_head = StateValueHead(config, self.device, feature_dim)
def make_networks(self, config): input_dim = self.env_spec.observation_space.flat_dim feature_dim = 50 self.featurize = nn.Sequential(nn.Linear(input_dim, 64), nn.ReLU(), nn.Linear(64, 64), nn.ReLU(), nn.Linear(64, feature_dim)) self.action_head = CategoricalHead(config, self.device, feature_dim, self.env_spec) self.V_head = StateValueHead(config, self.device, feature_dim)
def make_params(self, config): self.feature_layers = make_fc(self.env_spec.observation_space.flat_dim, config['network.hidden_sizes']) self.layer_norms = nn.ModuleList([ nn.LayerNorm(hidden_size) for hidden_size in config['network.hidden_sizes'] ]) self.output_layer = StateValueHead(config, self.device, config['network.hidden_sizes'][-1])
def make_params(self, config): super().make_params(config) self.output_layer = StateValueHead(config, self.device, config['network.hidden_sizes'][-1])
def make_params(self, config): super().make_params(config) self.output_layer = StateValueHead(config, self.device, self.rnn.hidden_size)
def test_state_value_head(): value_head = StateValueHead(None, None, 30) assert isinstance(value_head, BaseNetwork) assert value_head.feature_dim == 30 x = value_head(torch.randn(3, 30)) assert list(x.shape) == [3, 1]