Esempio n. 1
0
    def _update_observation(self):
        return_obs = Observation()

        return_obs.doubleArray = np.zeros(MDPState.nfeatures)
        for effector, mapping in MDPState.description.iteritems():
            pos = NaoWorldModel().get_effector_pos(effector)
            for key, axis in mapping.iteritems():
                return_obs.doubleArray[axis] = pos[MDPState.key_to_index(key)]
        return return_obs
Esempio n. 2
0
    def start(self):
        if self._random_start:
            self._sensors = np.random.random(self._sensors.shape)
            self._sensors *= (self._limits[:, 1] - self._limits[:, 0])
            self._sensors += self._limits[:, 0]
        else:
            self._sensors = np.zeros(self._sensors.shape)
            self._sensors[0] = -0.5

        self._render(self._sensors[0])

        return_obs = Observation()
        return_obs.doubleArray = self._sensors.tolist()
        return return_obs
Esempio n. 3
0
    def step(self, action):
        return_ro = Reward_observation_terminal()
        self._apply(action)
        self._render(self._sensors[0])

        return_ro.terminal = self._is_terminal()

        return_ro.r = -1.
        if return_ro.terminal:
            return_ro.r = .0

        if self._reward_noise > 0:
            return_ro.r += np.random.normal(scale=self._reward_noise)

        obs = Observation()
        obs.doubleArray = self._sensors.tolist()
        return_ro.o = obs

        return return_ro