コード例 #1
0
ファイル: agent.py プロジェクト: zuoxingdong/lagom
    def __init__(self, config, env, device, **kwargs):
        super().__init__(config, env, device, **kwargs)

        feature_dim = config['nn.sizes'][-1]
        self.feature_network = MLP(config, env, device, **kwargs)
        if isinstance(env.action_space, Discrete):
            self.action_head = CategoricalHead(feature_dim, env.action_space.n,
                                               device, **kwargs)
        elif isinstance(env.action_space, Box):
            self.action_head = DiagGaussianHead(feature_dim,
                                                flatdim(env.action_space),
                                                device, config['agent.std0'],
                                                **kwargs)
        self.V_head = nn.Linear(feature_dim, 1)
        ortho_init(self.V_head, weight_scale=1.0, constant_bias=0.0)
        self.V_head = self.V_head.to(
            device
        )  # reproducible between CPU/GPU, ortho_init behaves differently

        self.register_buffer('total_timestep', torch.tensor(0))
        #self.total_timestep = 0

        self.optimizer = optim.Adam(self.parameters(), lr=config['agent.lr'])
        if config['agent.use_lr_scheduler']:
            self.lr_scheduler = linear_lr_scheduler(self.optimizer,
                                                    config['train.timestep'],
                                                    min_lr=1e-8)
        self.gamma = config['agent.gamma']
        self.clip_rho = config['agent.clip_rho']
        self.clip_pg_rho = config['agent.clip_pg_rho']
コード例 #2
0
ファイル: agent.py プロジェクト: ludwigwinkler/lagom
    def __init__(self, config, env, device, **kwargs):
        super().__init__(config, env, device, **kwargs)

        feature_dim = config['nn.sizes'][-1]
        self.feature_network = MLP(config, env, device, **kwargs)
        if isinstance(env.action_space, Discrete):
            self.action_head = CategoricalHead(feature_dim, env.action_space.n,
                                               device, **kwargs)
        elif isinstance(env.action_space, Box):
            self.action_head = DiagGaussianHead(feature_dim,
                                                flatdim(env.action_space),
                                                device, config['agent.std0'],
                                                config['agent.std_style'],
                                                config['agent.std_range'],
                                                config['agent.beta'], **kwargs)
        self.V_head = nn.Linear(feature_dim, 1).to(device)
        ortho_init(self.V_head, weight_scale=1.0, constant_bias=0.0)

        self.total_timestep = 0

        self.optimizer = optim.Adam(self.parameters(), lr=config['agent.lr'])
        if config['agent.use_lr_scheduler']:
            self.lr_scheduler = linear_lr_scheduler(self.optimizer,
                                                    config['train.timestep'],
                                                    min_lr=1e-8)
コード例 #3
0
ファイル: diag_gaussian_head.py プロジェクト: vin136/lagom
 def init_params(self, config):
     # 0.01->almost zeros initially
     ortho_init(self.mean_head, weight_scale=0.01, constant_bias=0.0)
     
     if isinstance(self.logstd_head, nn.Linear):
         # 0.01->small std
         ortho_init(self.logstd_head, weight_scale=0.01, constant_bias=0.0)
コード例 #4
0
ファイル: agent.py プロジェクト: LorinChen/lagom
 def __init__(self, config, env, device, **kwargs):
     super().__init__(**kwargs)
     self.config = config
     self.env = env
     self.device = device
     
     self.feature_layers = make_fc(spaces.flatdim(env.observation_space), config['nn.sizes'])
     for layer in self.feature_layers:
         ortho_init(layer, nonlinearity='relu', constant_bias=0.0)
     self.layer_norms = nn.ModuleList([nn.LayerNorm(hidden_size) for hidden_size in config['nn.sizes']])
     
     self.to(self.device)
コード例 #5
0
ファイル: agent.py プロジェクト: ludwigwinkler/lagom
    def __init__(self, config, env, device, **kwargs):
        super().__init__(**kwargs)
        self.config = config
        self.env = env
        self.device = device

        self.feature_layers = make_fc(flatdim(env.observation_space),
                                      config['nn.sizes'])
        for layer in self.feature_layers:
            ortho_init(layer, nonlinearity='tanh', constant_bias=0.0)

        self.to(self.device)
コード例 #6
0
ファイル: diag_gaussian_head.py プロジェクト: lewisKit/lagom
    def init_params(self, config):
        # 0.01->almost zeros initially
        ortho_init(self.mean_head,
                   nonlinearity=None,
                   weight_scale=0.01,
                   constant_bias=0.0)

        if isinstance(self.logvar_head, nn.Linear):
            # 0.01->almost 1.0 std
            ortho_init(self.logvar_head,
                       nonlinearity=None,
                       weight_scale=0.01,
                       constant_bias=0.0)
コード例 #7
0
ファイル: network.py プロジェクト: vin136/lagom
    def init_params(self, config):
        for layer in self.feature_layers:
            ortho_init(layer, nonlinearity='tanh', constant_bias=0.0)

        ortho_init(self.pi_head, weight_scale=0.01, constant_bias=0.0)
        ortho_init(self.mean_head, weight_scale=0.01, constant_bias=0.0)
        ortho_init(self.logvar_head, weight_scale=0.01, constant_bias=0.0)
コード例 #8
0
    def __init__(self, config, env, device, **kwargs):
        super().__init__(**kwargs)
        self.config = config
        self.env = env
        self.device = device

        self.feature_layers = make_fc(flatdim(env.observation_space),
                                      config['nn.sizes'])
        for layer in self.feature_layers:
            ortho_init(layer, nonlinearity='tanh', constant_bias=0.0)

        feature_dim = config['nn.sizes'][-1]
        self.V_head = nn.Linear(feature_dim, 1)
        ortho_init(self.V_head, weight_scale=1.0, constant_bias=0.0)

        self.to(self.device)
コード例 #9
0
    def __init__(self, feature_dim, action_dim, device, **kwargs):
        super().__init__(**kwargs)
        self.feature_dim = feature_dim
        self.action_dim = action_dim
        self.device = device

        self.logstd_min = -20
        self.logstd_max = 2

        self.mean_head = nn.Linear(self.feature_dim, self.action_dim)
        # 0.01 -> almost zeros initially
        ortho_init(self.mean_head, weight_scale=0.01, constant_bias=0.0)

        self.logstd_head = nn.Linear(self.feature_dim, self.action_dim)

        self.to(self.device)
コード例 #10
0
ファイル: agent_lstm.py プロジェクト: zuoxingdong/lagom
 def __init__(self, config, env, device, **kwargs):
     super().__init__(config, env, device, **kwargs)
     
     feature_dim = config['rnn.size']
     self.feature_network = FeatureNet(config, env, device, **kwargs)
     if isinstance(env.action_space, spaces.Discrete):
         self.action_head = CategoricalHead(feature_dim, env.action_space.n, device, **kwargs)
     elif isinstance(env.action_space, spaces.Box):
         self.action_head = DiagGaussianHead(feature_dim, spaces.flatdim(env.action_space), device, config['agent.std0'], **kwargs)
     self.V_head = nn.Linear(feature_dim, 1)
     ortho_init(self.V_head, weight_scale=1.0, constant_bias=0.0)
     self.V_head = self.V_head.to(device)  # reproducible between CPU/GPU, ortho_init behaves differently
     
     self.total_timestep = 0
     self.optimizer = optim.Adam(self.parameters(), lr=config['agent.lr'])
     if config['agent.use_lr_scheduler']:
         self.lr_scheduler = linear_lr_scheduler(self.optimizer, config['train.timestep'], min_lr=1e-8)
         
     self.state = None
コード例 #11
0
ファイル: agent.py プロジェクト: zuoxingdong/lagom
    def __init__(self, config, env, device, **kwargs):
        super().__init__(**kwargs)
        self.config = config
        self.env = env
        self.device = device

        self.feature_layers = make_fc(spaces.flatdim(env.observation_space),
                                      config['nn.sizes'])
        for layer in self.feature_layers:
            ortho_init(layer, nonlinearity='tanh', constant_bias=0.0)

        feature_dim = config['nn.sizes'][-1]
        if isinstance(env.action_space, spaces.Discrete):
            self.action_head = CategoricalHead(feature_dim, env.action_space.n,
                                               device, **kwargs)
        elif isinstance(env.action_space, spaces.Box):
            self.action_head = DiagGaussianHead(
                feature_dim, spaces.flatdim(env.action_space), device,
                config['agent.std0'], **kwargs)

        self.to(self.device)
コード例 #12
0
    def init_params(self, config):
        ortho_init(self.fc, nonlinearity='relu', constant_bias=0.0)

        for layer in self.feature_layers:
            ortho_init(layer, nonlinearity='relu', constant_bias=0.0)

        ortho_init(self.x_head, nonlinearity='sigmoid', constant_bias=0.0)
コード例 #13
0
 def __init__(self, config, device, **kwargs):
     super().__init__(**kwargs)
     self.config = config
     self.device = device
     
     self.encoder = make_fc(784, [400])
     for layer in self.encoder:
         ortho_init(layer, nonlinearity='relu', constant_bias=0.0)
         
     self.mean_head = nn.Linear(400, config['nn.z_dim'])
     ortho_init(self.mean_head, weight_scale=0.01, constant_bias=0.0)
     self.logvar_head = nn.Linear(400, config['nn.z_dim'])
     ortho_init(self.logvar_head, weight_scale=0.01, constant_bias=0.0)
     
     self.decoder = make_fc(config['nn.z_dim'], [400])
     for layer in self.decoder:
         ortho_init(layer, nonlinearity='relu', constant_bias=0.0)
     self.x_head = nn.Linear(400, 784)
     ortho_init(self.x_head, nonlinearity='sigmoid', constant_bias=0.0)
     
     self.to(device)
     self.total_iter = 0
コード例 #14
0
    def __init__(self, in_features, out_features, num_density, **kwargs):
        super().__init__(**kwargs)

        self.in_features = in_features
        self.out_features = out_features
        self.num_density = num_density

        self.pi_head = nn.Linear(in_features, out_features * num_density)
        ortho_init(self.pi_head, weight_scale=0.01, constant_bias=0.0)
        self.mean_head = nn.Linear(in_features, out_features * num_density)
        ortho_init(self.mean_head, weight_scale=0.01, constant_bias=0.0)
        self.logvar_head = nn.Linear(in_features, out_features * num_density)
        ortho_init(self.logvar_head, weight_scale=0.01, constant_bias=0.0)
コード例 #15
0
 def init_params(self, config):
     for layer in self.feature_layers:
         ortho_init(layer, nonlinearity='leaky_relu', constant_bias=0.0)
コード例 #16
0
 def __init__(self, config, device, **kwargs):
     super().__init__(**kwargs)
     self.config = config
     self.device = device
     
     self.encoder = make_cnn(input_channel=1, 
                             channels=[64, 64, 64], 
                             kernels=[4, 4, 4], 
                             strides=[2, 2, 1], 
                             paddings=[0, 0, 0])
     for layer in self.encoder:
         ortho_init(layer, nonlinearity='relu', constant_bias=0.0)
     
     self.mean_head = nn.Linear(256, config['nn.z_dim'])
     ortho_init(self.mean_head, weight_scale=0.01, constant_bias=0.0)
     self.logvar_head = nn.Linear(256, config['nn.z_dim'])
     ortho_init(self.logvar_head, weight_scale=0.01, constant_bias=0.0)
     
     self.decoder_fc = nn.Linear(config['nn.z_dim'], 256)
     ortho_init(self.decoder_fc, nonlinearity='relu', constant_bias=0.0)
     self.decoder = make_transposed_cnn(input_channel=64, 
                                        channels=[64, 64, 64], 
                                        kernels=[4, 4, 4], 
                                        strides=[2, 1, 1], 
                                        paddings=[0, 0, 0], 
                                        output_paddings=[0, 0, 0])
     for layer in self.decoder:
         ortho_init(layer, nonlinearity='relu', constant_bias=0.0)
     self.x_head = nn.Linear(9216, 784)
     ortho_init(self.x_head, nonlinearity='sigmoid', constant_bias=0.0)
     
     self.to(device)
     self.total_iter = 0
コード例 #17
0
 def init_params(self, config):
     for ln_cell in self.rnn.ln_cells:
         ortho_init(ln_cell, weight_scale=1.0, constant_bias=0.0)
コード例 #18
0
ファイル: test_networks.py プロジェクト: vin136/lagom
 def init_params(self, config):
     ortho_init(self.rnn, nonlinearity=None, weight_scale=1.0, constant_bias=0.0)
コード例 #19
0
ファイル: test_networks.py プロジェクト: zuoxingdong/lagom
    def test_ortho_init(self):
        # Linear
        a = nn.Linear(2, 3)
        ortho_init(a, weight_scale=1000., constant_bias=10.)
        assert a.weight.max().item() > 30.
        assert np.allclose(a.bias.detach().numpy(), 10.)
        ortho_init(a, nonlinearity='relu')

        # Conv2d
        a = nn.Conv2d(2, 3, 3)
        ortho_init(a, weight_scale=1000., constant_bias=10.)
        assert a.weight.max().item() > 50.
        assert np.allclose(a.bias.detach().numpy(), 10.)
        ortho_init(a, nonlinearity='relu')

        # LSTM
        a = nn.LSTM(2, 3, 2)
        ortho_init(a, weight_scale=1000., constant_bias=10.)
        assert a.weight_hh_l0.max().item() > 50.
        assert a.weight_hh_l1.max().item() > 50.
        assert a.weight_ih_l0.max().item() > 50.
        assert a.weight_ih_l1.max().item() > 50.
        assert np.allclose(a.bias_hh_l0.detach().numpy(), 10.)
        assert np.allclose(a.bias_hh_l1.detach().numpy(), 10.)
        assert np.allclose(a.bias_ih_l0.detach().numpy(), 10.)
        assert np.allclose(a.bias_ih_l1.detach().numpy(), 10.)

        # LSTMCell
        a = nn.LSTMCell(3, 2)
        ortho_init(a, weight_scale=1000., constant_bias=10.)
        assert a.weight_hh.max().item() > 50.
        assert a.weight_ih.max().item() > 50.
        assert np.allclose(a.bias_hh.detach().numpy(), 10.)
        assert np.allclose(a.bias_ih.detach().numpy(), 10.)
コード例 #20
0
 def init_params(self, config):
     # 0.01->uniformly distributed
     ortho_init(self.action_head,
                nonlinearity=None,
                weight_scale=0.01,
                constant_bias=0.0)