Beispiel #1
0
class DQNAgent(Agent):
    def __init__(self,
                 state_size,
                 num_actions,
                 batch_size=64,
                 gamma=0.999,
                 epsilon=0.9,
                 epsilon_decay=0.99995,
                 **kwargs):
        super(DQNAgent, self).__init__(state_size, num_actions, **kwargs)

        self.batch_size = batch_size
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay

        self.net = DQN(state_size, num_actions, **kwargs)

    def get_action(self, state: np.ndarray):
        if self.mode == 'train' and np.random.random() < self.epsilon:
            action = np.random.randint(self.num_actions)
        else:
            action = np.argmax(self.get_q_values(state), axis=-1)
        self.epsilon *= self.epsilon_decay
        return action

    def get_q_values(self, state: np.ndarray) -> np.ndarray:
        return self.net.predict(
            state).detach().cpu().numpy()  # shape = (b, m, c)

    def optimize(self):
        batch: List[Transition] = self.buffer.sample(self.batch_size)
        if batch is None:
            return

        self.net.optimize(batch, self.gamma)

    def save_model(self, model_save_path: str):
        self.net.save_model(model_save_path)
Beispiel #2
0
class DQNCropAgent(CropAgent):
    def __init__(self,
                 state_size,
                 _num_actions,
                 batch_size=64,
                 gamma=0.999,
                 epsilon=0.9,
                 epsilon_decay=0.99995,
                 **kwargs):
        num_actions = len(self.WATER_VALUES) * len(self.NITROGEN_VALUES) \
                      * len(self.PHOSPHORUS_VALUES)
        super(DQNCropAgent, self).__init__(state_size, num_actions, **kwargs)

        self.batch_size = batch_size
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay

        self.net = DQN(state_size, self.num_actions, **kwargs)

    def get_action(self, state: np.ndarray):
        if self.mode == 'train' and np.random.random() < self.epsilon:
            action_idx = np.random.randint(self.num_actions)
        else:
            action_idx = self.get_q_values(state).argmax(axis=-1)

        self.epsilon *= self.epsilon_decay

        # convert action index to actual action values
        action = self.idx_to_action(action_idx)

        return action

    def get_q_values(self, state: np.ndarray) -> np.ndarray:
        return self.net.predict(state).detach().cpu().numpy()

    def get_saliency(self, state: np.ndarray,
                     q_values: np.ndarray) -> np.ndarray:
        assert state.size == self.state_size, "saliency cannot be computed during training"

        self.update_state_value_range(state)

        saliency = np.zeros_like(state)
        action: int = q_values.argmax()
        q_values_dict = {i: q / 100 for i, q in enumerate(q_values.squeeze())}

        for _ in range(self.SALIENCY_TRIALS):
            for i in range(self.state_size):
                perturbed_state = self.perturb(state, i)
                perturbed_q_values = self.get_q_values(perturbed_state)
                perturbed_q_values_dict = {
                    j: q / 100
                    for j, q in enumerate(perturbed_q_values.squeeze())
                }

                saliency[i] += computeSaliencyUsingSarfa(
                    action, q_values_dict,
                    perturbed_q_values_dict)[0] / self.SALIENCY_TRIALS

        return saliency

    def optimize(self):
        batch: List[Transition] = self.buffer.sample(self.batch_size)
        if batch is None:
            return

        self.net.optimize(batch, self.gamma)

    def save_model(self, model_save_path: str):
        self.net.save_model(model_save_path)