def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult: assert isinstance(action, int) action = cast(int, action) action_str = self.action_names()[action] if action_str == END: self._took_end_action = True self._success = self._is_goal_in_range() self.last_action_success = self._success else: self.env.step({"action": action_str}) self.last_action_success = self.env.last_action_success pose = self.env.agent_state() self.path.append({k: pose[k] for k in ["x", "y", "z"]}) self.task_info["followed_path"].append(pose) if len(self.path) > 1: self.travelled_distance += IThorEnvironment.position_dist( p0=self.path[-1], p1=self.path[-2], ignore_y=True) step_result = RLStepResult( observation=self.get_observations(), reward=self.judge(), done=self.is_done(), info={ "last_action_success": self.last_action_success, "action": action }, ) return step_result
def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult: assert isinstance(action, int) action = cast(int, action) action_str = self.class_action_names()[action] if action_str == END: self._took_end_action = True self._success = self.is_goal_object_visible() self.last_action_success = self._success else: self.env.step({"action": action_str}) self.last_action_success = self.env.last_action_success if ( not self.last_action_success ) and self._CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE is not None: self.env.update_graph_with_failed_action(failed_action=action_str) self.task_info["followed_path"].append(self.env.get_agent_location()) step_result = RLStepResult( observation=self.get_observations(), reward=self.judge(), done=self.is_done(), info={"last_action_success": self.last_action_success}, ) return step_result
def _step(self, action: Tuple[int, int]) -> RLStepResult: assert isinstance(action, tuple) action_str1 = self.action_names()[action[0]] action_str2 = self.action_names()[action[1]] self.env.step({"action": action_str1, "agentId": 0}) self.last_action_success1 = self.env.last_action_success self.env.step({"action": action_str2, "agentId": 1}) self.last_action_success2 = self.env.last_action_success pose1 = self.env.agent_state(0) self.task_info["followed_path1"].append(pose1) pose2 = self.env.agent_state(1) self.task_info["followed_path2"].append(pose2) self.last_geodesic_distance = self.env.distance_cache.find_distance( self.env.scene_name, {k: pose1[k] for k in ["x", "y", "z"]}, {k: pose2[k] for k in ["x", "y", "z"]}, self.env.distance_from_point_to_point, ) step_result = RLStepResult( observation=self.get_observations(), reward=self.judge(), done=self.is_done(), info={ "last_action_success": [ self.last_action_success1, self.last_action_success2, ], "action": action, }, ) return step_result
def _step(self, action: int) -> RLStepResult: action_str = self.class_action_names()[action] self._last_action_str = action_str action_dict = {"action": action_str} object_id = self.task_info["objectId"] if action_str == PICKUP: action_dict = {**action_dict, "object_id": object_id} self.env.step(action_dict) self.last_action_success = self.env.last_action_success last_action_name = self._last_action_str last_action_success = float(self.last_action_success) self.action_sequence_and_success.append((last_action_name, last_action_success)) self.visualize(last_action_name) # If the object has not been picked up yet and it was picked up in the previous step update parameters to integrate it into reward if not self.object_picked_up: if ( object_id in self.env.controller.last_event.metadata["arm"]["pickupableObjects"] ): self.env.step(dict(action="PickupObject")) # we are doing an additional pass here, label is not right and if we fail we will do it twice object_inventory = self.env.controller.last_event.metadata["arm"][ "heldObjects" ] if len(object_inventory) > 0 and object_id not in object_inventory: self.env.step(dict(action="ReleaseObject")) if self.env.is_object_at_low_level_hand(object_id): self.object_picked_up = True self.eplen_pickup = ( self._num_steps_taken + 1 ) # plus one because this step has not been counted yet if self.object_picked_up: object_state = self.env.get_object_by_id(object_id) goal_state = self.task_info["target_location"] goal_achieved = self.object_picked_up and self.obj_state_aproximity( object_state, goal_state ) if goal_achieved: self._took_end_action = True self.last_action_success = goal_achieved self._success = goal_achieved step_result = RLStepResult( observation=self.get_observations(), reward=self.judge(), done=self.is_done(), info={"last_action_success": self.last_action_success}, ) return step_result
def _step(self, action: Sequence[float]) -> RLStepResult: action = np.array(action) gym_obs, reward, self._gym_done, info = self.env.step(action=action) return RLStepResult( observation=self.get_observations(gym_obs=gym_obs), reward=reward, done=self.is_done(), info=info, )
def step(self, action: int) -> RLStepResult: step_result = super().step(action=action) if self.greedy_expert is not None: self.greedy_expert.update( action_taken=action, action_success=step_result.info["action_success"]) step_result = RLStepResult( observation=self.get_observations(), reward=step_result.reward, done=step_result.done, info=step_result.info, ) return step_result
def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult: assert isinstance(action, int) action = cast(int, action) action_str = self.action_names()[action] if action_str == END: self._took_end_action = True self._success = self._is_goal_in_range() self.last_action_success = self._success elif action_str in [ DIRECTIONAL_AHEAD_PUSH, DIRECTIONAL_BACK_PUSH, DIRECTIONAL_RIGHT_PUSH, DIRECTIONAL_LEFT_PUSH ]: angle = [0.001, 180, 90, 270][action - 5] obj = self.env.moveable_closest_obj_by_types( self.task_info["obstacles_types"]) if obj != None: self.env.step({ "action": action_str, "objectId": obj["objectId"], "moveMagnitude": obj["mass"] * 100, "pushAngle": angle, "autoSimulation": False }) self.last_action_success = self.env.last_action_success else: self.last_action_success = False elif action_str in [LOOK_UP, LOOK_DOWN]: self.env.step({"action": action_str}) self.last_action_success = self.env.last_action_success else: self.env.step({"action": action_str}) self.last_action_success = self.env.last_action_success pose = self.env.agent_state() self.path.append({k: pose[k] for k in ["x", "y", "z"]}) self.task_info["followed_path"].append(pose) if len(self.path) > 1 and self.path[-1] != self.path[-2]: self.num_moves_made += 1 step_result = RLStepResult( observation=self.get_observations(), reward=self.judge(), done=self.is_done(), info={ "last_action_success": self.last_action_success, "action": action }, ) return step_result
def _step(self, action: int) -> RLStepResult: assert isinstance(action, int) minigrid_obs, reward, done, info = self.env.step(action=action) self._last_action = action self._was_successful = done and reward > 0 return RLStepResult( observation=self.get_observations( minigrid_output_obs=minigrid_obs), reward=reward, done=self.is_done(), info=info, )
def _step(self, action: int) -> RLStepResult: """Take a step in the task. # Parameters action: is the index of the action from self.action_names() """ # parse the action data action_name = self.action_names()[action] if action_name.startswith("pickup"): # Don't allow the exploration agent to pickup objects action_success = False elif action_name.startswith("open_by_type"): # Don't allow the exploration agent to open objects action_success = False elif action_name.startswith( ("move", "rotate", "look", "stand", "crouch")): # take the movement action action_success = getattr(self.walkthrough_env, action_name)() elif action_name == "drop_held_object_with_snap": # Don't allow the exploration agent to drop objects (not that it can hold any) action_success = False elif action_name == "done": self._took_end_action = True action_success = True else: raise RuntimeError( f"Action '{action_name}' is not in the action space {RearrangeActionSpace}" ) self.actions_taken.append(action_name) self.actions_taken_success.append(action_success) return RLStepResult( observation=self.get_observations(), reward=self._judge(action_name=action_name, action_success=action_success), done=self.is_done(), info={ "action_name": action_name, "action_success": action_success }, )
def _step(self, action: int) -> RLStepResult: assert isinstance(action, int) action = cast(int, action) minigrid_obs, reward, self._minigrid_done, info = self.env.step( action=self._ACTION_IND_TO_MINIGRID_IND[action]) # self.env.render() return RLStepResult( observation=self.get_observations( minigrid_output_obs=minigrid_obs), reward=reward, done=self.is_done(), info=info, )
def _step(self, action: Dict[str, int]) -> RLStepResult: assert len(action) == 2, "got action={}".format(action) minigrid_obs, reward, self._minigrid_done, info = self.env.step( action=( self._ACTION_IND_TO_MINIGRID_IND[action["lower"] + 2 * action["higher"]] ) ) # self.env.render() return RLStepResult( observation=self.get_observations(minigrid_output_obs=minigrid_obs), reward=reward, done=self.is_done(), info=info, )
def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult: assert isinstance(action, int) action = cast(int, action) old_pos = self.get_observations()["agent_position_and_rotation"] action_str = self.action_names()[action] self._actions_taken.append(action_str) self.env.step({"action": action_str}) # if action_str != END: # self.env.step({"action": action_str}) # if self.env.env.get_metrics()['distance_to_goal'] <= 0.2: # self._took_end_action = True # self._success = self.env.env.get_metrics()['distance_to_goal'] <= 0.2 # self.last_action_success = self._success # else: # self.last_action_success = self.env.last_action_success if action_str == END: self._took_end_action = True self._success = self._is_goal_in_range() self.last_action_success = self._success else: self.last_action_success = self.env.last_action_success step_result = RLStepResult( observation=self.get_observations(), reward=self.judge(), done=self.is_done(), info={"last_action_success": self.last_action_success}, ) new_pos = self.get_observations()["agent_position_and_rotation"] if np.all(old_pos == new_pos): self._num_invalid_actions += 1 pos = self.get_observations()["agent_position_and_rotation"] self._positions.append({ "x": pos[0], "y": pos[1], "z": pos[2], "rotation": pos[3] }) return step_result
def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult: assert isinstance(action, int) action = cast(int, action) self.env.step(action) reward = STEP_PENALTY if np.all(self.env.current_position == self.env.goal_position): self._found_target = True reward += FOUND_TARGET_REWARD elif self.num_steps_taken() == self.max_steps - 1: reward = STEP_PENALTY / (1 - DISCOUNT_FACTOR) return RLStepResult( observation=self.get_observations(), reward=reward, done=self.is_done(), info=None, )
def _step(self, action: int) -> RLStepResult: action_str = self.class_action_names()[action] self._last_action_str = action_str action_dict = {"action": action_str} object_id = self.task_info["objectId"] if action_str == PICKUP: action_dict = {**action_dict, "object_id": object_id} self.env.step(action_dict) self.last_action_success = self.env.last_action_success last_action_name = self._last_action_str last_action_success = float(self.last_action_success) self.action_sequence_and_success.append( (last_action_name, last_action_success)) self.visualize(last_action_name) # If the object has not been picked up yet and it was picked up in the previous step update parameters to integrate it into reward if not self.object_picked_up: if self.env.is_object_at_low_level_hand(object_id): self.object_picked_up = True self.eplen_pickup = ( self._num_steps_taken + 1 ) # plus one because this step has not been counted yet if action_str == DONE: self._took_end_action = True object_state = self.env.get_object_by_id(object_id) goal_state = self.task_info["target_location"] goal_achieved = self.object_picked_up and self.obj_state_aproximity( object_state, goal_state) self.last_action_success = goal_achieved self._success = goal_achieved step_result = RLStepResult( observation=self.get_observations(), reward=self.judge(), done=self.is_done(), info={"last_action_success": self.last_action_success}, ) return step_result
def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult: assert isinstance(action, int) action = cast(int, action) action_str = self.class_action_names()[action] self.env.step({"action": action_str}) if action_str == END: self._took_end_action = True self._success = self._is_goal_in_range() self.last_action_success = self._success else: self.last_action_success = self.env.last_action_success step_result = RLStepResult( observation=self.get_observations(), reward=self.judge(), done=self.is_done(), info={"last_action_success": self.last_action_success}, ) return step_result
def _step(self, action: int) -> RLStepResult: """ action : is the index of the action from self.action_names() """ # parse the action data action_name = self.action_names()[action] if action_name.startswith("pickup"): # NOTE: due to the object_id's not being in the metadata for speedups, # they cannot be targeted with interactible actions. Hence, why # we're resetting the object filter before targeting by object id. with include_object_data(self.unshuffle_env.controller): metadata = self.unshuffle_env.last_event.metadata if len(metadata["inventoryObjects"]) != 0: action_success = False else: object_type = stringcase.pascalcase( action_name.replace("pickup_", "")) possible_objects = [ o for o in metadata["objects"] if o["visible"] and o["objectType"] == object_type ] possible_objects = sorted(possible_objects, key=lambda po: (po["distance"], po["name"])) object_before = None if len(possible_objects) > 0: object_before = possible_objects[0] object_id = object_before["objectId"] if object_before is not None: self.unshuffle_env.controller.step( "PickupObject", objectId=object_id, **self.unshuffle_env.physics_step_kwargs, ) action_success = self.unshuffle_env.last_event.metadata[ "lastActionSuccess"] else: action_success = False if action_success and self.unshuffle_env.held_object is None: get_logger().warning( f"`PickupObject` was successful in picking up {object_id} but we're not holding" f" any objects! Current task spec:\n{self.unshuffle_env.current_task_spec}." ) action_success = False elif action_name.startswith("open_by_type"): object_type = stringcase.pascalcase( action_name.replace("open_by_type_", "")) with include_object_data(self.unshuffle_env.controller): obj_name_to_goal_and_cur_poses = { cur_pose["name"]: (goal_pose, cur_pose) for _, goal_pose, cur_pose in zip( *self.unshuffle_env.poses) } goal_pose = None cur_pose = None for o in self.unshuffle_env.last_event.metadata["objects"]: if (o["visible"] and o["objectType"] == object_type and o["openable"] and not self.unshuffle_env.are_poses_equal( *obj_name_to_goal_and_cur_poses[o["name"]])): goal_pose, cur_pose = obj_name_to_goal_and_cur_poses[ o["name"]] break if goal_pose is not None: object_id = cur_pose["objectId"] goal_openness = goal_pose["openness"] if cur_pose["openness"] > 0.0: self.unshuffle_env.controller.step( "CloseObject", objectId=object_id, **self.unshuffle_env.physics_step_kwargs, ) self.unshuffle_env.controller.step( "OpenObject", objectId=object_id, openness=goal_openness, **self.unshuffle_env.physics_step_kwargs, ) action_success = self.unshuffle_env.last_event.metadata[ "lastActionSuccess"] else: action_success = False elif action_name.startswith( ("move", "rotate", "look", "stand", "crouch")): # apply to only the unshuffle env as the walkthrough agent's position # must now be managed by the whichever sensor is trying to read data from it. action_success = getattr(self.unshuffle_env, action_name)() elif action_name == "drop_held_object_with_snap": action_success = getattr(self.unshuffle_env, action_name)() elif action_name == "done": self._took_end_action = True action_success = True elif action_name == "pass": action_success = True else: raise RuntimeError( f"Action '{action_name}' is not in the action space {RearrangeActionSpace}" ) self.actions_taken.append(action_name) self.actions_taken_success.append(action_success) if self.task_spec_in_metrics: self.agent_locs.append(self.unshuffle_env.get_agent_location()) return RLStepResult( observation=None, reward=self._judge(), done=self.is_done(), info={ "action_name": action_name, "action_success": action_success }, )