def __init__(self, hparams): super().__init__() self.ob_dim = hparams['ob_dim'] self.ac_dim = hparams['ac_dim'] self.discrete = hparams['discrete'] self.size = hparams['critic_size'] self.n_layers = hparams['critic_n_layers'] self.learning_rate = hparams['learning_rate'] # critic parameters self.target_update_rate = hparams['target_update_rate'] # self.num_target_updates = hparams['num_target_updates'] # self.num_grad_steps_per_target_update = hparams['num_grad_steps_per_target_update'] self.gamma = hparams['gamma'] self.critic_network = ptu.build_mlp( self.ob_dim + self.ac_dim, 1, n_layers=self.n_layers, size=self.size, ) self.critic_network.to(ptu.device) self.target_network = copy.deepcopy(self.critic_network) self.loss = nn.SmoothL1Loss() # self.loss = nn.MSELoss() self.optimizer = optim.Adam( self.critic_network.parameters(), self.learning_rate, )
def __init__(self, ac_dim, ob_dim, n_layers, size, discrete=False, learning_rate=1e-4, entropy_weight=0., training=True, nn_baseline=False, **kwargs): super().__init__(**kwargs) # init vars self.ac_dim = ac_dim self.ob_dim = ob_dim self.n_layers = n_layers self.discrete = discrete self.size = size self.learning_rate = learning_rate self.entropy_weight = entropy_weight # only used for actor critic self.training = training self.nn_baseline = nn_baseline if self.discrete: self.logits_na = ptu.build_mlp(input_size=self.ob_dim, output_size=self.ac_dim, n_layers=self.n_layers, size=self.size) self.logits_na.to(ptu.device) self.mean_net = None self.logstd = None self.optimizer = optim.Adam(self.logits_na.parameters(), self.learning_rate) else: self.logits_na = None self.mean_net = ptu.build_mlp(input_size=self.ob_dim, output_size=self.ac_dim, n_layers=self.n_layers, size=self.size) self.logstd = nn.Parameter( torch.zeros(self.ac_dim, dtype=torch.float32, device=ptu.device)) self.mean_net.to(ptu.device) self.logstd.to(ptu.device) self.optimizer = optim.Adam( itertools.chain([self.logstd], self.mean_net.parameters()), self.learning_rate) if nn_baseline: self.baseline = ptu.build_mlp( input_size=self.ob_dim, output_size=1, n_layers=self.n_layers, size=self.size, ) self.baseline.to(ptu.device) self.baseline_optimizer = optim.Adam( self.baseline.parameters(), self.learning_rate, ) else: self.baseline = None