Esempio n. 1
0
    def _make_agent_modules(self):
        self.policy = StochasticPolicy(self._dummy_env,
                                       256,
                                       a_range=5,
                                       action_size=self._dummy_env.goal_size)
        self.v_module = Value(self._dummy_env, 256, use_antigoal=False)

        self.policy_lo = StochasticPolicy(self._dummy_env, 256)
        self.v_module_lo = Value(self._dummy_env, 256, use_antigoal=False)
Esempio n. 2
0
 def _make_agent_modules(self):
     self.vae = VQVAEDiscriminator(state_size=self._dummy_env.state_size, **self.vae_args)
     self.vae.load_checkpoint(self.vae_checkpoint_path)
     kwargs = dict(env=self._dummy_env, hidden_size=self.hidden_size, num_layers=self.num_layers,
                   goal_size=self.vae.code_size, normalize_inputs=self.normalize_inputs)
     self.policy = StochasticPolicy(**kwargs)
     self.v_module = Value(use_antigoal=self.use_antigoal, **kwargs)
Esempio n. 3
0
 def _make_agent_modules(self):
     self._make_skill_embedding()
     kwargs = dict(env=self._dummy_env,
                   hidden_size=self.hidden_size,
                   num_layers=self.num_layers,
                   goal_size=self.skill_n,
                   normalize_inputs=self.normalize_inputs)
     self.policy = StochasticPolicy(**kwargs)
     self.v_module = Value(use_antigoal=False, **kwargs)
Esempio n. 4
0
    def _make_agent_modules(self):
        self.policy = Policy(self._dummy_env, 128, a_range=5, action_size=2)
        self.p_target = Policy(self._dummy_env, 128, a_range=5, action_size=2)
        self.p_target.load_state_dict(self.policy.state_dict())

        self.q_module = Critic(self._dummy_env, 128, a_range=5, action_size=2)
        self.q_target = Critic(self._dummy_env, 128, a_range=5, action_size=2)
        self.q_target.load_state_dict(self.q_module.state_dict())

        self.policy_lo = StochasticPolicy(self._dummy_env, 256, goal_size=2)
        self.v_module_lo = Value(self._dummy_env,
                                 256,
                                 goal_size=2,
                                 use_antigoal=False)
Esempio n. 5
0
 def _make_agent_modules(self):
     self.policy = StochasticPolicy(self._dummy_env, 128)
     self.v_module = Value(self._dummy_env, 128, use_antigoal=False)
Esempio n. 6
0
 def _make_agent_modules(self):
     self.policy = StochasticPolicy(self._dummy_env, 256)
     self.v_module = Value(self._dummy_env,
                           256,
                           use_antigoal=self.use_antigoal)