def reset(self, uniform=False ): # initializes an episode and returns the state of the agent # if uniform is set to False, the first state is drawn according to the P0 distribution, # else it is drawn from a uniform distribution over all the states except for walls if uniform: prob = np.ones(self.nb_states) / self.nb_states self.current_state = discreteProb(prob) else: self.current_state = discreteProb(self.P0) self.timestep = 0 self.last_action_achieved = False return self.current_state
def step(self, u, deviation=0): # performs a step forward in the environment, # if you want to add some noise to the reward, give a value to the deviation param # which represents the mean μ of the normal distribution used to draw the noise noise = 0 # = deviation*np.random.randn() # generate noise, see an exercize in mbrl.ipynb reward = self.r[ self.current_state, u] + noise # r is the reward of the transition, you can add some noise to it # the state reached when performing action u from state x is sampled # according to the discrete distribution self.P[x,u,:] observation = discreteProb(self.P[self.current_state, u, :]) self.timestep += 1 info = {} #can be used when debugging info["State transition probabilities"] = self.P[self.current_state, u, :] info["reward's noise value"] = noise self.current_state = observation done = self.done() #checks if the episode is over return [observation, reward, done, info]
def reset(self, uniform=False): # initializes an episode # if uniform is set to False, the first state is drawn from the P0 distribution, # else it is drawn from a uniform distribution over all the states except for walls if uniform: prob = np.ones(self.observation_space.size) / ( self.observation_space.size - len(self.observation_space.walls)) for state in self.observation_space.walls: prob[state] = 0.0 self.current_state = discreteProb(prob) else: self.current_state = discreteProb(self.P0) self.timestep = 0 self.last_action_achieved = False return self.current_state
def step(self, u, deviation=0): # performs a step forward in the environment, # if you want to add some noise to the reward, give a value to the deviation param # which represents the mean μ of the normal distribution used to draw the noise noise = deviation * np.random.randn( ) # generate noise, useful for RTDP # r is the reward of the transition, you can add some noise to it reward = self.r[self.current_state, u] + noise # the state reached when performing action u from state x is sampled # according to the discrete distribution self.P[x,u,:] state = discreteProb(self.P[self.current_state, u, :]) self.timestep += 1 info = { "State transition probabilities": self.P[self.current_state, u, :], "reward's noise value": noise } # can be used when debugging self.current_state = state done = self.done() # checks if the episode is over return [state, reward, done, info]
def sample(self, prob_list=None): # returns an action drawn according to the prob_list distribution, # if the param is not set, then it is drawn from a uniform distribution if prob_list is None: prob_list = np.ones(self.size) / self.size index = discreteProb(prob_list) return self.actions[index]