def make_optimizer(self, config, **kwargs): self.optimizer = optim.Adam(self.parameters(), lr=config['algo.lr_V']) if config['algo.use_lr_scheduler']: if 'train.iter' in config: self.lr_scheduler = linear_lr_scheduler( self.optimizer, config['train.iter'], 'iteration-based') elif 'train.timestep' in config: self.lr_scheduler = linear_lr_scheduler( self.optimizer, config['train.timestep'] + 1, 'timestep-based') else: self.lr_scheduler = None
def __init__(self, config, env, device, **kwargs): super().__init__(config, env, device, **kwargs) feature_dim = config['nn.sizes'][-1] self.feature_network = MLP(config, env, device, **kwargs) if isinstance(env.action_space, Discrete): self.action_head = CategoricalHead(feature_dim, env.action_space.n, device, **kwargs) elif isinstance(env.action_space, Box): self.action_head = DiagGaussianHead(feature_dim, flatdim(env.action_space), device, config['agent.std0'], config['agent.std_style'], config['agent.std_range'], config['agent.beta'], **kwargs) self.V_head = nn.Linear(feature_dim, 1).to(device) ortho_init(self.V_head, weight_scale=1.0, constant_bias=0.0) self.total_timestep = 0 self.optimizer = optim.Adam(self.parameters(), lr=config['agent.lr']) if config['agent.use_lr_scheduler']: self.lr_scheduler = linear_lr_scheduler(self.optimizer, config['train.timestep'], min_lr=1e-8)
def __init__(self, config, env, device, **kwargs): super().__init__(config, env, device, **kwargs) feature_dim = config['nn.sizes'][-1] self.feature_network = MLP(config, env, device, **kwargs) if isinstance(env.action_space, Discrete): self.action_head = CategoricalHead(feature_dim, env.action_space.n, device, **kwargs) elif isinstance(env.action_space, Box): self.action_head = DiagGaussianHead(feature_dim, flatdim(env.action_space), device, config['agent.std0'], **kwargs) self.V_head = nn.Linear(feature_dim, 1) ortho_init(self.V_head, weight_scale=1.0, constant_bias=0.0) self.V_head = self.V_head.to( device ) # reproducible between CPU/GPU, ortho_init behaves differently self.register_buffer('total_timestep', torch.tensor(0)) #self.total_timestep = 0 self.optimizer = optim.Adam(self.parameters(), lr=config['agent.lr']) if config['agent.use_lr_scheduler']: self.lr_scheduler = linear_lr_scheduler(self.optimizer, config['train.timestep'], min_lr=1e-8) self.gamma = config['agent.gamma'] self.clip_rho = config['agent.clip_rho'] self.clip_pg_rho = config['agent.clip_pg_rho']
def __init__(self, config, env, device, **kwargs): super().__init__(config, env, device, **kwargs) self.policy = Actor(config, env, device, **kwargs) self.value = Critic(config, env, device, **kwargs) self.total_timestep = 0 self.policy_optimizer = optim.Adam(self.policy.parameters(), lr=config['agent.policy_lr']) self.value_optimizer = optim.Adam(self.value.parameters(), lr=config['agent.value_lr']) if config['agent.use_lr_scheduler']: self.policy_lr_scheduler = linear_lr_scheduler( self.policy_optimizer, config['train.timestep'], min_lr=1e-8)
def test_linear_lr_scheduler(method, N, min_lr, initial_lr): net = nn.Linear(30, 16) if method == 'Adam': optimizer = optim.Adam(net.parameters(), lr=initial_lr) elif method == 'RMSprop': optimizer = optim.RMSprop(net.parameters(), lr=initial_lr) elif method == 'Adamax': optimizer = optim.Adamax(net.parameters(), lr=initial_lr) lr_scheduler = linear_lr_scheduler(optimizer, N, min_lr) assert lr_scheduler.base_lrs[0] == initial_lr for i in range(200): lr_scheduler.step() assert lr_scheduler.get_lr()[0] >= min_lr assert lr_scheduler.get_lr()[0] == min_lr
def __init__(self, config, env, device, **kwargs): super().__init__(config, env, device, **kwargs) feature_dim = config['rnn.size'] self.feature_network = FeatureNet(config, env, device, **kwargs) if isinstance(env.action_space, spaces.Discrete): self.action_head = CategoricalHead(feature_dim, env.action_space.n, device, **kwargs) elif isinstance(env.action_space, spaces.Box): self.action_head = DiagGaussianHead(feature_dim, spaces.flatdim(env.action_space), device, config['agent.std0'], **kwargs) self.V_head = nn.Linear(feature_dim, 1) ortho_init(self.V_head, weight_scale=1.0, constant_bias=0.0) self.V_head = self.V_head.to(device) # reproducible between CPU/GPU, ortho_init behaves differently self.total_timestep = 0 self.optimizer = optim.Adam(self.parameters(), lr=config['agent.lr']) if config['agent.use_lr_scheduler']: self.lr_scheduler = linear_lr_scheduler(self.optimizer, config['train.timestep'], min_lr=1e-8) self.state = None