def test_ortho_init(self): # Linear a = nn.Linear(2, 3) ortho_init(a, weight_scale=1000., constant_bias=10.) assert a.weight.max().item() > 100. assert np.allclose(a.bias.detach().numpy(), 10.) ortho_init(a, nonlinearity='relu') # Conv2d a = nn.Conv2d(2, 3, 3) ortho_init(a, weight_scale=1000., constant_bias=10.) assert a.weight.max().item() > 100. assert np.allclose(a.bias.detach().numpy(), 10.) ortho_init(a, nonlinearity='relu') # LSTM a = nn.LSTM(2, 3, 2) ortho_init(a, weight_scale=1000., constant_bias=10.) assert a.weight_hh_l0.max().item() > 100. assert a.weight_hh_l1.max().item() > 100. assert a.weight_ih_l0.max().item() > 100. assert a.weight_ih_l1.max().item() > 100. assert np.allclose(a.bias_hh_l0.detach().numpy(), 10.) assert np.allclose(a.bias_hh_l1.detach().numpy(), 10.) assert np.allclose(a.bias_ih_l0.detach().numpy(), 10.) assert np.allclose(a.bias_ih_l1.detach().numpy(), 10.)
def __init__(self, config, network, env_spec, learn_V=False, **kwargs): self.learn_V = learn_V super().__init__(config=config, network=network, env_spec=env_spec, **kwargs) assert self.env_spec.control_type == 'Discrete', 'expected as Discrete control type' assert hasattr( self.network, 'last_feature_dim' ), 'network expected to have an attribute `.last_feature_dim`' # Create action layer action_head = nn.Linear( in_features=self.network.last_feature_dim, out_features=self.env_spec.action_space.flat_dim) # Orthogonal initialization to the parameters with scale 0.01, i.e. uniformly distributed ortho_init(action_head, nonlinearity=None, weight_scale=0.01, constant_bias=0.0) # Augment to network (e.g. tracked by network.parameters() for optimizer to update) self.network.add_module('action_head', action_head) # Create value layer if required if self.learn_V: value_head = nn.Linear(in_features=self.network.last_feature_dim, out_features=1) ortho_init(value_head, nonlinearity=None, weight_scale=1.0, constant_bias=0.0) self.network.add_module('value_head', value_head)
def __init__(self, config, network, env_spec, device, learn_V=False, **kwargs): super().__init__(config=config, network=network, env_spec=env_spec, device=device, **kwargs) self.learn_V = learn_V assert self.env_spec.control_type == 'Discrete', 'expected as Discrete control type' assert hasattr( self.network, 'last_feature_dim' ), 'network expected to have an attribute `.last_feature_dim`' # Create action head, orthogonal initialization and put onto device action_head = nn.Linear(in_features=self.network.last_feature_dim, out_features=self.action_space.flat_dim) ortho_init(action_head, nonlinearity=None, weight_scale=0.01, constant_bias=0.0) # 0.01->uniformly distributed action_head = action_head.to(self.device) # Augment to network (e.g. tracked by network.parameters() for optimizer to update) self.network.add_module('action_head', action_head) # Create value head (if required), orthogonal initialization and put onto device if self.learn_V: value_head = nn.Linear(in_features=self.network.last_feature_dim, out_features=1) ortho_init(value_head, nonlinearity=None, weight_scale=1.0, constant_bias=0.0) value_head = value_head.to(self.device) self.network.add_module('value_head', value_head) # Initialize and track the RNN hidden states if self.recurrent: self.reset_rnn_states()
def init_params(self, config): for layer in self.encoder: ortho_init(layer, nonlinearity='relu', constant_bias=0.0) ortho_init(self.mu_head, nonlinearity=None, weight_scale=0.01, constant_bias=0.0) ortho_init(self.logvar_head, nonlinearity=None, weight_scale=0.01, constant_bias=0.0) for layer in self.decoder: ortho_init(layer, nonlinearity='relu', constant_bias=0.0)
def init_params(self, config): for layer in self.layers: ortho_init(layer, nonlinearity='relu', constant_bias=0.0)
def init_params(self, config): ortho_init(self.rnn, nonlinearity=None, weight_scale=1.0, constant_bias=0.0)
def __init__(self, config, network, env_spec, device, learn_V=False, min_std=1e-6, std_style='exp', constant_std=None, std_state_dependent=False, init_std=1.0, **kwargs): super().__init__(config=config, network=network, env_spec=env_spec, device=device, **kwargs) self.learn_V = learn_V # Record additional arguments self.min_std = min_std self.std_style = std_style self.constant_std = constant_std self.std_state_dependent = std_state_dependent self.init_std = init_std assert self.env_spec.control_type == 'Continuous', 'expected as Continuous control type' assert hasattr( self.network, 'last_feature_dim' ), 'network expected to have an attribute `.last_feature_dim`' if self.constant_std is not None: assert not self.std_state_dependent # Create mean head, orthogonal initialization and put onto device mean_head = nn.Linear(in_features=self.network.last_feature_dim, out_features=self.action_space.flat_dim) ortho_init(mean_head, nonlinearity=None, weight_scale=0.01, constant_bias=0.0) # 0.01->almost zeros initially mean_head = mean_head.to(self.device) # Augment to network (e.g. tracked by network.parameters() for optimizer to update) self.network.add_module('mean_head', mean_head) # Create logvar head, orthogonal initialization and put onto device if self.constant_std is not None: # using constant std if np.isscalar(self.constant_std): # scalar logvar_head = torch.full( size=[self.env_spec.action_space.flat_dim], fill_value=torch.log(torch.tensor( self.constant_std)**2), # log(std**2) requires_grad=False) # no grad else: # a numpy array logvar_head = torch.log( torch.from_numpy(np.array(self.constant_std)**2).float()) else: # no constant std, so learn it if self.std_state_dependent: # state dependent, so a layer logvar_head = nn.Linear( in_features=self.network.last_feature_dim, out_features=self.env_spec.action_space.flat_dim) ortho_init(logvar_head, nonlinearity=None, weight_scale=0.01, constant_bias=0.0) # 0.01->almost 1.0 std else: # state independent, so a learnable nn.Parameter assert self.init_std is not None, f'expected init_std is given as scalar value, got {self.init_std}' logvar_head = nn.Parameter( torch.full(size=[self.env_spec.action_space.flat_dim], fill_value=torch.log( torch.tensor(self.init_std)**2), requires_grad=True)) # with grad logvar_head = logvar_head.to(self.device) # Augment to network as module or as attribute if isinstance(logvar_head, nn.Linear): self.network.add_module('logvar_head', logvar_head) else: self.network.logvar_head = logvar_head # Create value head (if required), orthogonal initialization and put onto device if self.learn_V: value_head = nn.Linear(in_features=self.network.last_feature_dim, out_features=1) ortho_init(value_head, nonlinearity=None, weight_scale=1.0, constant_bias=0.0) value_head = value_head.to(self.device) self.network.add_module('value_head', value_head) # Initialize and track the RNN hidden states if self.recurrent: self.reset_rnn_states()