Ejemplo n.º 1
0
 def do_rollouts(self, seed):
     paths = gather_paths_parallel(self.env.env_id,
                                   self.sol_state[-1],
                                   self.act_sequence,
                                   self.filter_coefs,
                                   seed,
                                   self.paths_per_cpu,
                                   self.num_cpu,
                                   )
     return paths
Ejemplo n.º 2
0
 def do_rollouts(self, seed, goal=None):
     paths = gather_paths_parallel(self.env.env_name,
                                   self.sol_state[-1],
                                   self.act_sequence,
                                   self.filter_coefs,
                                   seed,
                                   goal,
                                   self.reward_type,
                                   self.reference,
                                   self.env.alpha,
                                   self.paths_per_cpu,
                                   self.num_cpu,
                                   )
     return paths
Ejemplo n.º 3
0
    def get_action(self, env_state):
        # Set to env_state
        # Shoot trajectories
        # Return optimal action
        seed = self.seed + self.ctr * 1000
        paths = gather_paths_parallel(self.env.env_id,
                                      env_state,
                                      self.act_sequence,
                                      self.filter_coefs,
                                      seed,
                                      self.paths_per_cpu,
                                      self.num_cpu,
                                      )

        num_traj = len(paths)
        R = self.score_trajectory(paths)
        S = np.exp(self.kappa*(R-np.max(R)))
        act = np.sum([paths[i]["actions"][0] * S[i] for i in range(num_traj)], axis=0)
        act = act / (np.sum(S) + 1e-6)
        return act