Beispiel #1
0
    def reset(self,
              uniform=False
              ):  # initializes an episode and returns the state of the agent
        # if uniform is set to False, the first state is drawn according to the P0 distribution,
        # else it is drawn from a uniform distribution over all the states except for walls

        if uniform:
            prob = np.ones(self.nb_states) / self.nb_states
            self.current_state = discreteProb(prob)
        else:
            self.current_state = discreteProb(self.P0)

        self.timestep = 0
        self.last_action_achieved = False
        return self.current_state
Beispiel #2
0
    def step(self,
             u,
             deviation=0):  # performs a step forward in the environment,
        # if you want to add some noise to the reward, give a value to the deviation param
        # which represents the mean μ of the normal distribution used to draw the noise

        noise = 0  # = deviation*np.random.randn() # generate noise, see an exercize in mbrl.ipynb
        reward = self.r[
            self.current_state,
            u] + noise  # r is the reward of the transition, you can add some noise to it

        # the state reached when performing action u from state x is sampled
        # according to the discrete distribution self.P[x,u,:]
        observation = discreteProb(self.P[self.current_state, u, :])

        self.timestep += 1

        info = {}  #can be used when debugging
        info["State transition probabilities"] = self.P[self.current_state,
                                                        u, :]
        info["reward's noise value"] = noise

        self.current_state = observation
        done = self.done()  #checks if the episode is over

        return [observation, reward, done, info]
Beispiel #3
0
    def reset(self, uniform=False):  # initializes an episode
        # if uniform is set to False, the first state is drawn from the P0 distribution,
        # else it is drawn from a uniform distribution over all the states except for walls
        if uniform:
            prob = np.ones(self.observation_space.size) / (
                self.observation_space.size -
                len(self.observation_space.walls))
            for state in self.observation_space.walls:
                prob[state] = 0.0
            self.current_state = discreteProb(prob)
        else:
            self.current_state = discreteProb(self.P0)

        self.timestep = 0
        self.last_action_achieved = False
        return self.current_state
Beispiel #4
0
    def step(self,
             u,
             deviation=0):  # performs a step forward in the environment,
        # if you want to add some noise to the reward, give a value to the deviation param
        # which represents the mean μ of the normal distribution used to draw the noise

        noise = deviation * np.random.randn(
        )  # generate noise, useful for RTDP

        # r is the reward of the transition, you can add some noise to it
        reward = self.r[self.current_state, u] + noise

        # the state reached when performing action u from state x is sampled
        # according to the discrete distribution self.P[x,u,:]
        state = discreteProb(self.P[self.current_state, u, :])

        self.timestep += 1

        info = {
            "State transition probabilities": self.P[self.current_state, u, :],
            "reward's noise value": noise
        }  # can be used when debugging

        self.current_state = state
        done = self.done()  # checks if the episode is over

        return [state, reward, done, info]
Beispiel #5
0
    def sample(self, prob_list=None):
        # returns an action drawn according to the prob_list distribution,
        # if the param is not set, then it is drawn from a uniform distribution
        if prob_list is None:
            prob_list = np.ones(self.size) / self.size

        index = discreteProb(prob_list)
        return self.actions[index]