コード例 #1
0
    def step(self, action):
        return_ro = Reward_observation_terminal()
        return_ro.r = self._calculate_reward(action)
        return_ro.o = Observation()
        return_ro.terminal = self._check_terminal(action)

        return return_ro
コード例 #2
0
    def step(self, action):
        return_ro = Reward_observation_terminal()
        return_ro.r = -1.0
        return_ro.o = Observation()
        if len(action.doubleArray) == 0:
            return_ro.terminal = True
        else:
            return_ro.terminal = False

        return return_ro
コード例 #3
0
    def step(self, action):
        # Make sure the action is valid
        assert len(action.intArray) == 1, "Expected 1 integer action."
        assert action.intArray[0] >= 0, "Expected action to be in [0,3]"
        assert action.intArray[0] < 4, "Expected action to be in [0,3]"

        self.update_position(action.intArray[0])

        obs = Observation()
        obs.intArray = [self.calculate_flat_state()]

        return_ro = Reward_observation_terminal()
        return_ro.r = self.calculate_reward()
        return_ro.o = obs
        return_ro.terminal = self.check_current_terminal()

        return return_ro
コード例 #4
0
ファイル: mountaincar.py プロジェクト: evenmarbles/rlpy
    def step(self, action):
        return_ro = Reward_observation_terminal()
        self._apply(action)
        self._render(self._sensors[0])

        return_ro.terminal = self._is_terminal()

        return_ro.r = -1.
        if return_ro.terminal:
            return_ro.r = .0

        if self._reward_noise > 0:
            return_ro.r += np.random.normal(scale=self._reward_noise)

        obs = Observation()
        obs.doubleArray = self._sensors.tolist()
        return_ro.o = obs

        return return_ro