Esempio n. 1
0
 def make_optimizer(self, config, **kwargs):
     self.optimizer = optim.Adam(self.parameters(), lr=config['algo.lr_V'])
     if config['algo.use_lr_scheduler']:
         if 'train.iter' in config:
             self.lr_scheduler = linear_lr_scheduler(
                 self.optimizer, config['train.iter'], 'iteration-based')
         elif 'train.timestep' in config:
             self.lr_scheduler = linear_lr_scheduler(
                 self.optimizer, config['train.timestep'] + 1,
                 'timestep-based')
     else:
         self.lr_scheduler = None
Esempio n. 2
0
    def __init__(self, config, env, device, **kwargs):
        super().__init__(config, env, device, **kwargs)

        feature_dim = config['nn.sizes'][-1]
        self.feature_network = MLP(config, env, device, **kwargs)
        if isinstance(env.action_space, Discrete):
            self.action_head = CategoricalHead(feature_dim, env.action_space.n,
                                               device, **kwargs)
        elif isinstance(env.action_space, Box):
            self.action_head = DiagGaussianHead(feature_dim,
                                                flatdim(env.action_space),
                                                device, config['agent.std0'],
                                                config['agent.std_style'],
                                                config['agent.std_range'],
                                                config['agent.beta'], **kwargs)
        self.V_head = nn.Linear(feature_dim, 1).to(device)
        ortho_init(self.V_head, weight_scale=1.0, constant_bias=0.0)

        self.total_timestep = 0

        self.optimizer = optim.Adam(self.parameters(), lr=config['agent.lr'])
        if config['agent.use_lr_scheduler']:
            self.lr_scheduler = linear_lr_scheduler(self.optimizer,
                                                    config['train.timestep'],
                                                    min_lr=1e-8)
Esempio n. 3
0
    def __init__(self, config, env, device, **kwargs):
        super().__init__(config, env, device, **kwargs)

        feature_dim = config['nn.sizes'][-1]
        self.feature_network = MLP(config, env, device, **kwargs)
        if isinstance(env.action_space, Discrete):
            self.action_head = CategoricalHead(feature_dim, env.action_space.n,
                                               device, **kwargs)
        elif isinstance(env.action_space, Box):
            self.action_head = DiagGaussianHead(feature_dim,
                                                flatdim(env.action_space),
                                                device, config['agent.std0'],
                                                **kwargs)
        self.V_head = nn.Linear(feature_dim, 1)
        ortho_init(self.V_head, weight_scale=1.0, constant_bias=0.0)
        self.V_head = self.V_head.to(
            device
        )  # reproducible between CPU/GPU, ortho_init behaves differently

        self.register_buffer('total_timestep', torch.tensor(0))
        #self.total_timestep = 0

        self.optimizer = optim.Adam(self.parameters(), lr=config['agent.lr'])
        if config['agent.use_lr_scheduler']:
            self.lr_scheduler = linear_lr_scheduler(self.optimizer,
                                                    config['train.timestep'],
                                                    min_lr=1e-8)
        self.gamma = config['agent.gamma']
        self.clip_rho = config['agent.clip_rho']
        self.clip_pg_rho = config['agent.clip_pg_rho']
Esempio n. 4
0
    def __init__(self, config, env, device, **kwargs):
        super().__init__(config, env, device, **kwargs)

        self.policy = Actor(config, env, device, **kwargs)
        self.value = Critic(config, env, device, **kwargs)

        self.total_timestep = 0
        self.policy_optimizer = optim.Adam(self.policy.parameters(),
                                           lr=config['agent.policy_lr'])
        self.value_optimizer = optim.Adam(self.value.parameters(),
                                          lr=config['agent.value_lr'])
        if config['agent.use_lr_scheduler']:
            self.policy_lr_scheduler = linear_lr_scheduler(
                self.policy_optimizer, config['train.timestep'], min_lr=1e-8)
Esempio n. 5
0
def test_linear_lr_scheduler(method, N, min_lr, initial_lr):
    net = nn.Linear(30, 16)
    if method == 'Adam':
        optimizer = optim.Adam(net.parameters(), lr=initial_lr)
    elif method == 'RMSprop':
        optimizer = optim.RMSprop(net.parameters(), lr=initial_lr)
    elif method == 'Adamax':
        optimizer = optim.Adamax(net.parameters(), lr=initial_lr)
    lr_scheduler = linear_lr_scheduler(optimizer, N, min_lr)
    assert lr_scheduler.base_lrs[0] == initial_lr

    for i in range(200):
        lr_scheduler.step()
        assert lr_scheduler.get_lr()[0] >= min_lr
    assert lr_scheduler.get_lr()[0] == min_lr
Esempio n. 6
0
 def __init__(self, config, env, device, **kwargs):
     super().__init__(config, env, device, **kwargs)
     
     feature_dim = config['rnn.size']
     self.feature_network = FeatureNet(config, env, device, **kwargs)
     if isinstance(env.action_space, spaces.Discrete):
         self.action_head = CategoricalHead(feature_dim, env.action_space.n, device, **kwargs)
     elif isinstance(env.action_space, spaces.Box):
         self.action_head = DiagGaussianHead(feature_dim, spaces.flatdim(env.action_space), device, config['agent.std0'], **kwargs)
     self.V_head = nn.Linear(feature_dim, 1)
     ortho_init(self.V_head, weight_scale=1.0, constant_bias=0.0)
     self.V_head = self.V_head.to(device)  # reproducible between CPU/GPU, ortho_init behaves differently
     
     self.total_timestep = 0
     self.optimizer = optim.Adam(self.parameters(), lr=config['agent.lr'])
     if config['agent.use_lr_scheduler']:
         self.lr_scheduler = linear_lr_scheduler(self.optimizer, config['train.timestep'], min_lr=1e-8)
         
     self.state = None