Ejemplo n.º 1
0
    def test_ortho_init(self):
        # Linear
        a = nn.Linear(2, 3)
        ortho_init(a, weight_scale=1000., constant_bias=10.)
        assert a.weight.max().item() > 100.
        assert np.allclose(a.bias.detach().numpy(), 10.)
        ortho_init(a, nonlinearity='relu')

        # Conv2d
        a = nn.Conv2d(2, 3, 3)
        ortho_init(a, weight_scale=1000., constant_bias=10.)
        assert a.weight.max().item() > 100.
        assert np.allclose(a.bias.detach().numpy(), 10.)
        ortho_init(a, nonlinearity='relu')

        # LSTM
        a = nn.LSTM(2, 3, 2)
        ortho_init(a, weight_scale=1000., constant_bias=10.)
        assert a.weight_hh_l0.max().item() > 100.
        assert a.weight_hh_l1.max().item() > 100.
        assert a.weight_ih_l0.max().item() > 100.
        assert a.weight_ih_l1.max().item() > 100.
        assert np.allclose(a.bias_hh_l0.detach().numpy(), 10.)
        assert np.allclose(a.bias_hh_l1.detach().numpy(), 10.)
        assert np.allclose(a.bias_ih_l0.detach().numpy(), 10.)
        assert np.allclose(a.bias_ih_l1.detach().numpy(), 10.)
Ejemplo n.º 2
0
    def __init__(self, config, network, env_spec, learn_V=False, **kwargs):
        self.learn_V = learn_V

        super().__init__(config=config,
                         network=network,
                         env_spec=env_spec,
                         **kwargs)

        assert self.env_spec.control_type == 'Discrete', 'expected as Discrete control type'
        assert hasattr(
            self.network, 'last_feature_dim'
        ), 'network expected to have an attribute `.last_feature_dim`'

        # Create action layer
        action_head = nn.Linear(
            in_features=self.network.last_feature_dim,
            out_features=self.env_spec.action_space.flat_dim)
        # Orthogonal initialization to the parameters with scale 0.01, i.e. uniformly distributed
        ortho_init(action_head,
                   nonlinearity=None,
                   weight_scale=0.01,
                   constant_bias=0.0)
        # Augment to network (e.g. tracked by network.parameters() for optimizer to update)
        self.network.add_module('action_head', action_head)

        # Create value layer if required
        if self.learn_V:
            value_head = nn.Linear(in_features=self.network.last_feature_dim,
                                   out_features=1)
            ortho_init(value_head,
                       nonlinearity=None,
                       weight_scale=1.0,
                       constant_bias=0.0)
            self.network.add_module('value_head', value_head)
Ejemplo n.º 3
0
    def __init__(self,
                 config,
                 network,
                 env_spec,
                 device,
                 learn_V=False,
                 **kwargs):
        super().__init__(config=config,
                         network=network,
                         env_spec=env_spec,
                         device=device,
                         **kwargs)
        self.learn_V = learn_V

        assert self.env_spec.control_type == 'Discrete', 'expected as Discrete control type'
        assert hasattr(
            self.network, 'last_feature_dim'
        ), 'network expected to have an attribute `.last_feature_dim`'

        # Create action head, orthogonal initialization and put onto device
        action_head = nn.Linear(in_features=self.network.last_feature_dim,
                                out_features=self.action_space.flat_dim)
        ortho_init(action_head,
                   nonlinearity=None,
                   weight_scale=0.01,
                   constant_bias=0.0)  # 0.01->uniformly distributed
        action_head = action_head.to(self.device)
        # Augment to network (e.g. tracked by network.parameters() for optimizer to update)
        self.network.add_module('action_head', action_head)

        # Create value head (if required), orthogonal initialization and put onto device
        if self.learn_V:
            value_head = nn.Linear(in_features=self.network.last_feature_dim,
                                   out_features=1)
            ortho_init(value_head,
                       nonlinearity=None,
                       weight_scale=1.0,
                       constant_bias=0.0)
            value_head = value_head.to(self.device)
            self.network.add_module('value_head', value_head)

        # Initialize and track the RNN hidden states
        if self.recurrent:
            self.reset_rnn_states()
Ejemplo n.º 4
0
    def init_params(self, config):
        for layer in self.encoder:
            ortho_init(layer, nonlinearity='relu', constant_bias=0.0)

        ortho_init(self.mu_head,
                   nonlinearity=None,
                   weight_scale=0.01,
                   constant_bias=0.0)
        ortho_init(self.logvar_head,
                   nonlinearity=None,
                   weight_scale=0.01,
                   constant_bias=0.0)

        for layer in self.decoder:
            ortho_init(layer, nonlinearity='relu', constant_bias=0.0)
Ejemplo n.º 5
0
 def init_params(self, config):
     for layer in self.layers:
         ortho_init(layer, nonlinearity='relu', constant_bias=0.0)
Ejemplo n.º 6
0
 def init_params(self, config):
     ortho_init(self.rnn, nonlinearity=None, weight_scale=1.0, constant_bias=0.0)
Ejemplo n.º 7
0
    def __init__(self,
                 config,
                 network,
                 env_spec,
                 device,
                 learn_V=False,
                 min_std=1e-6,
                 std_style='exp',
                 constant_std=None,
                 std_state_dependent=False,
                 init_std=1.0,
                 **kwargs):
        super().__init__(config=config,
                         network=network,
                         env_spec=env_spec,
                         device=device,
                         **kwargs)
        self.learn_V = learn_V

        # Record additional arguments
        self.min_std = min_std
        self.std_style = std_style
        self.constant_std = constant_std
        self.std_state_dependent = std_state_dependent
        self.init_std = init_std

        assert self.env_spec.control_type == 'Continuous', 'expected as Continuous control type'
        assert hasattr(
            self.network, 'last_feature_dim'
        ), 'network expected to have an attribute `.last_feature_dim`'
        if self.constant_std is not None:
            assert not self.std_state_dependent

        # Create mean head, orthogonal initialization and put onto device
        mean_head = nn.Linear(in_features=self.network.last_feature_dim,
                              out_features=self.action_space.flat_dim)
        ortho_init(mean_head,
                   nonlinearity=None,
                   weight_scale=0.01,
                   constant_bias=0.0)  # 0.01->almost zeros initially
        mean_head = mean_head.to(self.device)
        # Augment to network (e.g. tracked by network.parameters() for optimizer to update)
        self.network.add_module('mean_head', mean_head)

        # Create logvar head, orthogonal initialization and put onto device
        if self.constant_std is not None:  # using constant std
            if np.isscalar(self.constant_std):  # scalar
                logvar_head = torch.full(
                    size=[self.env_spec.action_space.flat_dim],
                    fill_value=torch.log(torch.tensor(
                        self.constant_std)**2),  # log(std**2)
                    requires_grad=False)  # no grad
            else:  # a numpy array
                logvar_head = torch.log(
                    torch.from_numpy(np.array(self.constant_std)**2).float())
        else:  # no constant std, so learn it
            if self.std_state_dependent:  # state dependent, so a layer
                logvar_head = nn.Linear(
                    in_features=self.network.last_feature_dim,
                    out_features=self.env_spec.action_space.flat_dim)
                ortho_init(logvar_head,
                           nonlinearity=None,
                           weight_scale=0.01,
                           constant_bias=0.0)  # 0.01->almost 1.0 std
            else:  # state independent, so a learnable nn.Parameter
                assert self.init_std is not None, f'expected init_std is given as scalar value, got {self.init_std}'
                logvar_head = nn.Parameter(
                    torch.full(size=[self.env_spec.action_space.flat_dim],
                               fill_value=torch.log(
                                   torch.tensor(self.init_std)**2),
                               requires_grad=True))  # with grad
        logvar_head = logvar_head.to(self.device)
        # Augment to network as module or as attribute
        if isinstance(logvar_head, nn.Linear):
            self.network.add_module('logvar_head', logvar_head)
        else:
            self.network.logvar_head = logvar_head

        # Create value head (if required), orthogonal initialization and put onto device
        if self.learn_V:
            value_head = nn.Linear(in_features=self.network.last_feature_dim,
                                   out_features=1)
            ortho_init(value_head,
                       nonlinearity=None,
                       weight_scale=1.0,
                       constant_bias=0.0)
            value_head = value_head.to(self.device)
            self.network.add_module('value_head', value_head)

        # Initialize and track the RNN hidden states
        if self.recurrent:
            self.reset_rnn_states()