Ejemplo n.º 1
0
 def __init__(self,
              action_selector: ActionSelector,
              max_exploratory_steps: int = 1000):
     ActionSelector.__init__(self, action_selector.use_cuda)
     self.action_selector = action_selector
     self.exploration = self.action_selector.exploration
     self.max_exploratory_steps = max_exploratory_steps
Ejemplo n.º 2
0
    def __init__(
        self,
        action_selector: ActionSelector,
        action_space: list,
        mu: float = 0.0,
        theta: float = 0.15,
        max_sigma: float = 0.3,
        min_sigma: float = 0.3,
        decay_period: int = 100000,
    ):
        ActionSelector.__init__(self, action_selector.use_cuda)
        self.action_selector = action_selector
        self.mu = mu
        self.theta = theta
        self.sigma = max_sigma
        self.max_sigma = max_sigma
        self.min_sigma = min_sigma
        self.decay_period = decay_period
        self.action_min = self.action_selector.action_min
        self.action_max = self.action_selector.action_max
        self.action_dim = self.action_selector.action_dim

        self.exploration = True

        self._reset()
Ejemplo n.º 3
0
    def __init__(self, action_selector: ActionSelector, mu: float,
                 sigma: float):
        ActionSelector.__init__(self, action_selector.use_cuda)
        self.action_selector = action_selector
        self.action_min = self.action_selector.action_min
        self.action_max = self.action_selector.action_max
        self.action_dim = self.action_selector.action_dim

        self.mu = mu
        self.sigma = sigma
        self.exploration = True
Ejemplo n.º 4
0
 def __init__(
     self,
     action_selector: ActionSelector,
     action_space: spaces.Discrete,
     hyper_params: DictConfig,
 ):
     ActionSelector.__init__(self, action_selector.use_cuda)
     self.action_selector = action_selector
     self.action_space = action_space
     self.eps = hyper_params.eps
     self.eps_final = hyper_params.eps_final
     self.eps_decay = (self.eps -
                       self.eps_final) / hyper_params.max_exploration_frame
Ejemplo n.º 5
0
    def test(
        self,
        policy: BaseModel,
        action_selector: ActionSelector,
        episode_i: int,
        update_step: int,
    ) -> float:
        """Test policy without random exploration a number of times."""
        print("====TEST START====")
        policy.eval()
        action_selector.exploration = False
        episode_rewards = []
        for test_i in range(self.experiment_info.test_num):
            state = self.env.reset()
            episode_reward = 0
            done = False
            while not done:
                if self.experiment_info.render_train:
                    self.env.render()
                action = action_selector(policy, state)
                state, action, reward, next_state, done = self.step(
                    state, action)
                episode_reward = episode_reward + reward
                state = next_state

            print(
                f"episode num: {episode_i} | test: {test_i} episode reward: {episode_reward}"
            )
            episode_rewards.append(episode_reward)

        mean_rewards = np.mean(episode_rewards)
        print(f"EPISODE NUM: {episode_i} | UPDATE STEP: {update_step} |"
              f"MEAN REWARD: {np.mean(episode_rewards)}")
        action_selector.exploration = True
        print("====TEST END====")

        return mean_rewards
Ejemplo n.º 6
0
 def __init__(self, device: str):
     ActionSelector.__init__(self, device)
Ejemplo n.º 7
0
 def __init__(self, action_dim: int, action_range: list, use_cuda: bool):
     ActionSelector.__init__(self, use_cuda)
     self.action_dim = action_dim
     self.action_min = np.array(action_range[0])
     self.action_max = np.array(action_range[1])
Ejemplo n.º 8
0
 def __init__(self, use_cuda: bool):
     ActionSelector.__init__(self, use_cuda)