Ejemplo n.º 1
0
    def setState(self, observation):

        self.lstate = GridworldEnv.state2str(observation)

        if self.lstate not in self.Q.keys():
            self.Q[self.lstate] = np.zeros(self.nb_action)

        if random.uniform(0, 1) < self.epsilon:
            self.laction = np.random.randint(self.nb_action)

        else:
            self.laction = np.argmax([self.Q[self.lstate]])
Ejemplo n.º 2
0
    def act(self, observation, reward, done):

        obs = GridworldEnv.state2str(observation)

        if obs not in self.Q.keys():
            self.Q[obs] = np.zeros(self.nb_action)

        if random.uniform(0, 1) < self.epsilon:
            self.laction = np.random.randint(self.nb_action)

        else:
            self.laction = np.argmax([self.Q[obs]])

        self._update_Qvalue(reward, obs, done)

        self.lstate = obs

        return self.laction
Ejemplo n.º 3
0
    def _total_reward(self):
        """Sum of rewards expected for every state"""
        return sum(self.value[state] for state in self.mdp.keys())

        obs = GridworldEnv.state2str(observation)

        if obs in self.Q.keys():
            self.Q[obs] = np.zeros(self.nb_action)

        if random.uniform(0, 1) < self.epsilon:
            self.laction = np.random.randint(self.nb_action)

        else:
            self.laction = np.argmax([self.Q[obs]])

        self._update_Qvalue(reward, obs, done)

        self.lstate = obs

        return self.laction
Ejemplo n.º 4
0
    def setState(self, observation):

        self.lstate = GridworldEnv.state2str(observation)

        if self.lstate not in self.Q.keys():
            self.Q[self.lstate] = np.zeros(self.nb_action)
Ejemplo n.º 5
0
 def act(self, observation, reward, done):
     # get action for current state
     # obs = str(obs.tolist())
     obs = GridworldEnv.state2str(observation)
     action = self.policy[obs]
     return action