def step(self, action): return_ro = Reward_observation_terminal() return_ro.r = self._calculate_reward(action) return_ro.o = Observation() return_ro.terminal = self._check_terminal(action) return return_ro
def step(self, action): return_ro = Reward_observation_terminal() return_ro.r = -1.0 return_ro.o = Observation() if len(action.doubleArray) == 0: return_ro.terminal = True else: return_ro.terminal = False return return_ro
def step(self, action): # Make sure the action is valid assert len(action.intArray) == 1, "Expected 1 integer action." assert action.intArray[0] >= 0, "Expected action to be in [0,3]" assert action.intArray[0] < 4, "Expected action to be in [0,3]" self.update_position(action.intArray[0]) obs = Observation() obs.intArray = [self.calculate_flat_state()] return_ro = Reward_observation_terminal() return_ro.r = self.calculate_reward() return_ro.o = obs return_ro.terminal = self.check_current_terminal() return return_ro
def step(self, action): return_ro = Reward_observation_terminal() self._apply(action) self._render(self._sensors[0]) return_ro.terminal = self._is_terminal() return_ro.r = -1. if return_ro.terminal: return_ro.r = .0 if self._reward_noise > 0: return_ro.r += np.random.normal(scale=self._reward_noise) obs = Observation() obs.doubleArray = self._sensors.tolist() return_ro.o = obs return return_ro