def _update_observation(self): return_obs = Observation() return_obs.doubleArray = np.zeros(MDPState.nfeatures) for effector, mapping in MDPState.description.iteritems(): pos = NaoWorldModel().get_effector_pos(effector) for key, axis in mapping.iteritems(): return_obs.doubleArray[axis] = pos[MDPState.key_to_index(key)] return return_obs
def start(self): if self.fixed_start_state: state_valid = self.set_agent_state(self.start_row, self.start_col) if not state_valid: print "The fixed start state was NOT valid: " + str(int(self.start_row)) + "," + str(int(self.start_row)) self.set_random_state() else: self.set_random_state() return_obs = Observation() return_obs.intArray = [self.calculate_flat_state()] return return_obs
def start(self): if self._random_start: self._sensors = np.random.random(self._sensors.shape) self._sensors *= (self._limits[:, 1] - self._limits[:, 0]) self._sensors += self._limits[:, 0] else: self._sensors = np.zeros(self._sensors.shape) self._sensors[0] = -0.5 self._render(self._sensors[0]) return_obs = Observation() return_obs.doubleArray = self._sensors.tolist() return return_obs
def step(self, action): # Make sure the action is valid assert len(action.intArray) == 1, "Expected 1 integer action." assert action.intArray[0] >= 0, "Expected action to be in [0,3]" assert action.intArray[0] < 4, "Expected action to be in [0,3]" self.update_position(action.intArray[0]) obs = Observation() obs.intArray = [self.calculate_flat_state()] return_ro = Reward_observation_terminal() return_ro.r = self.calculate_reward() return_ro.o = obs return_ro.terminal = self.check_current_terminal() return return_ro
def _update_observation(self): return_obs = Observation() y_ankle = NaoWorldModel().get_joint_pos(self._ankle_roll)[1] y_hip = NaoWorldModel().get_joint_pos(self._hip_roll)[1] displacement = (y_ankle - y_hip) * 1000 # convert to millimeters if self._feature_rep == 'rl': image_x = NaoWorldModel().get_object("ball").image_center.x return_obs.intArray = [image_x, displacement] else: s = [0] * MDPState.nfeatures bin_num = int(math.floor((displacement - MDPState.min_features) / self._bin_width)) try: s[bin_num] = 1 except IndexError, e: exit(e) return_obs.intArray = s
def step(self, action): return_ro = Reward_observation_terminal() self._apply(action) self._render(self._sensors[0]) return_ro.terminal = self._is_terminal() return_ro.r = -1. if return_ro.terminal: return_ro.r = .0 if self._reward_noise > 0: return_ro.r += np.random.normal(scale=self._reward_noise) obs = Observation() obs.doubleArray = self._sensors.tolist() return_ro.o = obs return return_ro
def get_Observation(self): return Observation.from_AbstractType(self.get_AbstractType())