def reset(self, start_pose = None, target_pose = None): """Environment reset. Args: start_pose (list[3] or np.array[3]): [x,y,yaw] initial robot position. target_pose (list[3] or np.array[3]): [x,y,yaw] target robot position. Returns: np.array: Environment state. """ self.elapsed_steps = 0 self.prev_base_reward = None # Initialize environment state self.state = np.zeros(self._get_env_state_len()) rs_state = np.zeros(self._get_robot_server_state_len()) # Set Robot starting position if start_pose: assert len(start_pose)==3 else: start_pose = self._get_start_pose() rs_state[3:6] = start_pose # Set target position if target_pose: assert len(target_pose)==3 else: target_pose = self._get_target(start_pose) rs_state[0:3] = target_pose # Generate obstacles positions self._generate_obstacles_positions() rs_state[1021:1024] = self.sim_obstacles[0] rs_state[1024:1027] = self.sim_obstacles[1] rs_state[1027:1030] = self.sim_obstacles[2] # Set initial state of the Robot Server state_msg = robot_server_pb2.State(state = rs_state.tolist()) if not self.client.set_state_msg(state_msg): raise RobotServerError("set_state") # Get Robot Server state rs_state = copy.deepcopy(np.nan_to_num(np.array(self.client.get_state_msg().state))) # Check if the length of the Robot Server state received is correct if not len(rs_state)== self._get_robot_server_state_len(): raise InvalidStateError("Robot Server state received has wrong length") # Convert the initial state from Robot Server format to environment format self.state = self._robot_server_state_to_env_state(rs_state) # Check if the environment state is contained in the observation space if not self.observation_space.contains(self.state): raise InvalidStateError() return self.state
def reset(self, initial_joint_positions=None, ee_target_pose=None): """Environment reset. Args: initial_joint_positions (list[6] or np.array[6]): robot joint positions in radians. ee_target_pose (list[6] or np.array[6]): [x,y,z,r,p,y] target end effector pose. Returns: np.array: Environment state. """ self.elapsed_steps = 0 self.last_action = None self.prev_base_reward = None # Initialize environment state self.state = np.zeros(self._get_env_state_len()) rs_state = np.zeros(self._get_robot_server_state_len()) # Set initial robot joint positions if initial_joint_positions: assert len(initial_joint_positions) == 6 ur10_initial_joint_positions = initial_joint_positions else: ur10_initial_joint_positions = self._get_initial_joint_positions() rs_state[6:12] = self.ur10._ur_10_joint_list_to_ros_joint_list( ur10_initial_joint_positions) # Set target End Effector pose if ee_target_pose: assert len(ee_target_pose) == 6 else: ee_target_pose = self._get_target_pose() rs_state[0:6] = ee_target_pose # Set initial state of the Robot Server if not self.client.set_state(copy.deepcopy(rs_state.tolist())): raise RobotServerError("set_state") # Get Robot Server state rs_state = copy.deepcopy( np.nan_to_num(np.array(self.client.get_state()))) # Check if the length of the Robot Server state received is correct if not len(rs_state) == self._get_robot_server_state_len(): raise InvalidStateError( "Robot Server state received has wrong length") # Convert the initial state from Robot Server format to environment format self.state = self._robot_server_state_to_env_state(rs_state) # Check if the environment state is contained in the observation space if not self.observation_space.contains(self.state): raise InvalidStateError() return self.state
def reset(self, joint_positions=None) -> np.array: """Environment reset. Args: joint_positions (list[6] or np.array[6]): robot joint positions in radians. Order is defined by Returns: np.array: Environment state. """ if joint_positions: assert len(joint_positions) == 6 else: joint_positions = JOINT_POSITIONS self.elapsed_steps = 0 # Initialize environment state state_len = self.observation_space.shape[0] state = np.zeros(state_len) rs_state = dict.fromkeys(self.get_robot_server_composition(), 0.0) # Set initial robot joint positions self._set_joint_positions(joint_positions) # Update joint positions in rs_state rs_state.update(self.joint_positions) # Set initial state of the Robot Server state_msg = self._set_initial_robot_server_state(rs_state) if not self.client.set_state_msg(state_msg): raise RobotServerError("set_state") # Get Robot Server state rs_state = self.client.get_state_msg().state_dict # Check if the length and keys of the Robot Server state received is correct self._check_rs_state_keys(rs_state) # Convert the initial state from Robot Server format to environment format state = self._robot_server_state_to_env_state(rs_state) # Check if the environment state is contained in the observation space if not self.observation_space.contains(state): raise InvalidStateError() # Check if current position is in the range of the initial joint positions for joint in self.joint_positions.keys(): if not np.isclose( self.joint_positions[joint], rs_state[joint], atol=0.05): raise InvalidStateError( 'Reset joint positions are not within defined range') self.rs_state = rs_state return state
def _check_rs_state_keys(self, rs_state) -> None: keys = self.get_robot_server_composition() if not len(keys) == len(rs_state.keys()): raise InvalidStateError( "Robot Server state keys to not match. Different lengths.") for key in keys: if key not in rs_state.keys(): raise InvalidStateError("Robot Server state keys to not match")
def step(self, action): self.elapsed_steps += 1 # Check if the action is within the action space assert self.action_space.contains(action), "%r (%s) invalid" % (action, type(action)) # Convert environment action to Robot Server action rs_action = copy.deepcopy(action) # Scale action rs_action = np.multiply(action, self.max_vel) # Send action to Robot Server if not self.client.send_action(rs_action.tolist()): raise RobotServerError("send_action") # Get state from Robot Server rs_state = self.client.get_state_msg().state # Convert the state from Robot Server format to environment format self.state = self._robot_server_state_to_env_state(rs_state) # Check if the environment state is contained in the observation space if not self.observation_space.contains(self.state): raise InvalidStateError() # Assign reward reward, done, info = self._reward(rs_state=rs_state, action=action) return self.state, reward, done, info
def step(self, action) -> Tuple[np.array, float, bool, dict]: if type(action) == list: action = np.array(action) self.elapsed_steps += 1 # Check if the action is contained in the action space if not self.action_space.contains(action): raise InvalidActionError() # Send action to Robot Server and get state rs_state = self.client.send_action_get_state( action.tolist()).state_dict self._check_rs_state_keys(rs_state) # Convert the state from Robot Server format to environment format state = self._robot_server_state_to_env_state(rs_state) # Check if the environment state is contained in the observation space if not self.observation_space.contains(state): raise InvalidStateError() self.rs_state = rs_state # Assign reward reward = 0 done = False reward, done, info = self.reward(rs_state=rs_state, action=action) if self.rs_state_to_info: info['rs_state'] = self.rs_state return state, reward, done, info
def step(self, action): # Check if the action is within the action space assert self.action_space.contains( action), "%r (%s) invalid" % (action, type(action)) # Convert environment action to Robot Server action rs_action = copy.deepcopy(action) # Scale action rs_action = np.multiply(rs_action, self.abs_joint_pos_range) # Convert action indexing from ur10 to ros rs_action = self.ur10._ur_10_joint_list_to_ros_joint_list(rs_action) # Send action to Robot Server if not self.client.send_action(rs_action.tolist()): raise RobotServerError("send_action") # Get state from Robot Server rs_state = self.client.get_state() # Convert the state from Robot Server format to environment format self.state = self._robot_server_state_to_env_state(rs_state) # Check if the environment state is contained in the observation space if not self.observation_space.contains(self.state): raise InvalidStateError() # Assign reward reward = 0 done = False reward, done, info = self._reward(rs_state=rs_state, action=action) return self.state, reward, done, info
def step(self, action): self.elapsed_steps += 1 # Check if the action is within the action space assert self.action_space.contains( action), "%r (%s) invalid" % (action, type(action)) # action = np.append(action, [0.0]) # Convert environment action to Robot Server action # rs_action = copy.deepcopy(action) # Scale action # action = np.multiply(action, self.abs_joint_pos_range[1:4]) # convert action # current_joint_positions = self.ur5._ros_joint_list_to_ur5_joint_list(self.prev_rs_state[6:12])[1:4] # print('current joint positions:', current_joint_positions) # print('action to take:', action) # rs_action = action + current_joint_positions # print('converted action:', action) # Convert environment action to Robot Server action initial_joint_positions = self._get_initial_joint_positions() # print(initial_joint_positions) initial_joint_positions[1:4] = initial_joint_positions[1:4] + action # print(initial_joint_positions) rs_action = initial_joint_positions # rs_action = np.array([initial_joint_positions[0], rs_action[0], rs_action[1], rs_action[2], initial_joint_positions[4], initial_joint_positions[5]]) # Convert action indexing from ur5 to ros rs_action = self.ur5._ur_5_joint_list_to_ros_joint_list(rs_action) # Send action to Robot Server if not self.client.send_action(rs_action.tolist()): raise RobotServerError("send_action") # Get state from Robot Server rs_state = self.client.get_state_msg().state self.prev_rs_state = copy.deepcopy(rs_state) # print('rs_state joints', rs_state[6:12]) # Convert the state from Robot Server format to environment format self.state = self._robot_server_state_to_env_state(rs_state) # Check if the environment state is contained in the observation space if not self.observation_space.contains(self.state): raise InvalidStateError() # Assign reward reward = 0 done = False reward, done, info = self._reward(rs_state=rs_state, action=action) return self.state, reward, done, info
def reset(self): self.last_action = None self.prev_base_reward = None # Initialize environment state self.state = np.zeros(self._get_env_state_len()) rs_state = np.zeros(self._get_robot_server_state_len()) # Set initial robot joint positions ur10_initial_joint_positions = self._get_initial_joint_positions() rs_state[6:12] = self.ur10._ur_10_joint_list_to_ros_joint_list( ur10_initial_joint_positions) # Generate target End Effector pose rs_state[0:6] = self.ur10.get_random_workspace_pose() # Set initial state of the Robot Server if not self.client.set_state(copy.deepcopy(rs_state.tolist())): raise RobotServerError("set_state") # Get Robot Server state rs_state = copy.deepcopy( np.nan_to_num(np.array(self.client.get_state()))) # Check if the length of the Robot Server state received is correct if not len(rs_state) == self._get_robot_server_state_len(): raise InvalidStateError( "Robot Server state received has wrong length") # Convert the initial state from Robot Server format to environment format self.state = self._robot_server_state_to_env_state(rs_state) # Check if the environment state is contained in the observation space if not self.observation_space.contains(self.state): raise InvalidStateError() return self.state
def reset(self): self.prev_base_reward = None self.steps_in_goal = 0 # Initialize environment state self.state = np.zeros(self._get_env_state_len()) rs_state = np.zeros(self._get_robot_server_state_len()) # Set Robot starting position start_pose = self._get_start_pose() rs_state[3:6] = start_pose # Generate target position target = self._get_target(start_pose) rs_state[0:3] = target # Set initial state of the Robot Server if not self.client.set_state(copy.deepcopy(rs_state.tolist())): raise RobotServerError("set_state") # Get Robot Server state rs_state = copy.deepcopy( np.nan_to_num(np.array(self.client.get_state()))) # Check if the length of the Robot Server state received is correct if not len(rs_state) == self._get_robot_server_state_len(): raise InvalidStateError( "Robot Server state received has wrong length") # Convert the initial state from Robot Server format to environment format self.state = self._robot_server_state_to_env_state(rs_state) # Check if the environment state is contained in the observation space if not self.observation_space.contains(self.state): raise InvalidStateError() return self.state
def reset(self, position=None) -> np.array: """Environment reset. Args: position (list[2] or np.array[2]): [x,y] initial robot position. Returns: np.array: Environment state. """ # Set Robot starting position if position: assert len(position) == 2 else: position = [0, 0] self.elapsed_steps = 0 # Initialize environment state state_len = self.observation_space.shape[0] state = np.zeros(state_len) rs_state = dict.fromkeys(self.get_robot_server_composition(), 0.0) # Fill rs_state rs_state['pos_x'] = position[0] rs_state['pos_y'] = position[1] # Set initial state of the Robot Server state_msg = self._set_initial_robot_server_state(rs_state) if not self.client.set_state_msg(state_msg): raise RobotServerError("set_state") # Get Robot Server state rs_state = self.client.get_state_msg().state_dict # Check if the length and keys of the Robot Server state received is correct self._check_rs_state_keys(rs_state) # Convert the initial state from Robot Server format to environment format state = self._robot_server_state_to_env_state(rs_state) # Check if the environment state is contained in the observation space if not self.observation_space.contains(state): raise InvalidStateError() self.rs_state = rs_state return state
def reset(self, initial_joint_positions=None, type='random'): """Environment reset. Args: initial_joint_positions (list[6] or np.array[6]): robot joint positions in radians. ee_target_pose (list[6] or np.array[6]): [x,y,z,r,p,y] target end effector pose. Returns: np.array: Environment state. """ self.elapsed_steps = 0 self.last_action = None self.prev_base_reward = None # Initialize environment state self.state = np.zeros(self._get_env_state_len()) rs_state = np.zeros(self._get_robot_server_state_len()) # Set initial robot joint positions if initial_joint_positions: assert len(initial_joint_positions) == 6 ur5_initial_joint_positions = initial_joint_positions elif (len(self.last_position_on_success) != 0) and (type == 'continue'): ur5_initial_joint_positions = self.last_position_on_success else: ur5_initial_joint_positions = self._get_initial_joint_positions() rs_state[6:12] = self.ur5._ur_5_joint_list_to_ros_joint_list( ur5_initial_joint_positions) # Set initial state of the Robot Server # z_amplitude = 0.25 # z_frequency = 0.125 # z_offset = 0.35 z_amplitude = np.random.default_rng().uniform(low=0.09, high=0.35) z_frequency = 0.125 z_offset = np.random.default_rng().uniform(low=0.2, high=0.6) float_params = { "x": 0.13, "y": -0.30, "z_amplitude": z_amplitude, "z_frequency": z_frequency, "z_offset": z_offset } state_msg = robot_server_pb2.State(state=rs_state.tolist(), float_params=float_params) if not self.client.set_state_msg(state_msg): raise RobotServerError("set_state") # Get Robot Server state rs_state = copy.deepcopy( np.nan_to_num(np.array(self.client.get_state_msg().state))) self.prev_rs_state = copy.deepcopy(rs_state) # Check if the length of the Robot Server state received is correct if not len(rs_state) == self._get_robot_server_state_len(): raise InvalidStateError( "Robot Server state received has wrong length") # Convert the initial state from Robot Server format to environment format self.state = self._robot_server_state_to_env_state(rs_state) # save start position self.start_position = self.state[3:9] # Check if the environment state is contained in the observation space if not self.observation_space.contains(self.state): raise InvalidStateError() # check if current position is in the range of the initial joint positions if (len(self.last_position_on_success) == 0) or (type == 'random'): joint_positions = self.ur5._ros_joint_list_to_ur5_joint_list( rs_state[6:12]) tolerance = 0.1 for joint in range(len(joint_positions)): if (joint_positions[joint] + tolerance < self.initial_joint_positions_low[joint]) or ( joint_positions[joint] - tolerance > self.initial_joint_positions_high[joint]): raise InvalidStateError( 'Reset joint positions are not within defined range') # go one empty action and check if there is a collision action = self.state[4:7] _, _, done, info = self.step(action) self.elapsed_steps = 0 if done: raise InvalidStateError('Reset started in a collision state') return self.state
def reset(self, initial_joint_positions=None, ee_target_pose=None, type='random'): """Environment reset. Args: initial_joint_positions (list[6] or np.array[6]): robot joint positions in radians. ee_target_pose (list[6] or np.array[6]): [x,y,z,r,p,y] target end effector pose. Returns: np.array: Environment state. """ self.elapsed_steps = 0 self.last_action = None self.prev_base_reward = None # Initialize environment state self.state = np.zeros(self._get_env_state_len()) rs_state = np.zeros(self._get_robot_server_state_len()) print(self.last_position_on_success) # Set initial robot joint positions if initial_joint_positions: assert len(initial_joint_positions) == 6 ur5_initial_joint_positions = initial_joint_positions elif (len(self.last_position_on_success) != 0) and (type == 'continue'): ur5_initial_joint_positions = self.last_position_on_success else: ur5_initial_joint_positions = self._get_initial_joint_positions() rs_state[6:12] = self.ur5._ur_5_joint_list_to_ros_joint_list( ur5_initial_joint_positions) # Set target End Effector pose if ee_target_pose: assert len(ee_target_pose) == 6 else: ee_target_pose = self._get_target_pose() rs_state[0:6] = ee_target_pose # Set initial state of the Robot Server state_msg = robot_server_pb2.State(state=rs_state.tolist()) if not self.client.set_state_msg(state_msg): raise RobotServerError("set_state") # Get Robot Server state rs_state = copy.deepcopy( np.nan_to_num(np.array(self.client.get_state_msg().state))) # Check if the length of the Robot Server state received is correct if not len(rs_state) == self._get_robot_server_state_len(): raise InvalidStateError( "Robot Server state received has wrong length") # Convert the initial state from Robot Server format to environment format self.state = self._robot_server_state_to_env_state(rs_state) # Check if the environment state is contained in the observation space if not self.observation_space.contains(self.state): raise InvalidStateError() # check if current position is in the range of the initial joint positions if (len(self.last_position_on_success) == 0) or (type == 'random'): joint_positions = self.ur5._ros_joint_list_to_ur5_joint_list( rs_state[6:12]) tolerance = 0.1 for joint in range(len(joint_positions)): if (joint_positions[joint] + tolerance < self.initial_joint_positions_low[joint]) or ( joint_positions[joint] - tolerance > self.initial_joint_positions_high[joint]): raise InvalidStateError( 'Reset joint positions are not within defined range') # go one empty action and check if there is a collision action = self.state[3:3 + len(self.action_space.sample())] _, _, done, info = self.step(action) self.elapsed_steps = 0 if done: raise InvalidStateError('Reset started in a collision state') return self.state
def reset(self, joint_positions=JOINT_POSITIONS, ee_target_pose=None, randomize_start=False) -> np.array: """Environment reset. Args: joint_positions (list[6] or np.array[6]): robot joint positions in radians. ee_target_pose (list[6] or np.array[6]): [x,y,z,r,p,y] target end effector pose. randomize_start (bool): if True the starting position is randomized defined by the RANDOM_JOINT_OFFSET """ if joint_positions: assert len(joint_positions) == 6 else: joint_positions = JOINT_POSITIONS self.elapsed_steps = 0 # Initialize environment state state_len = self.observation_space.shape[0] state = np.zeros(state_len) rs_state = dict.fromkeys(self.get_robot_server_composition(), 0.0) # Randomize initial robot joint positions if randomize_start: joint_positions_low = np.array(joint_positions) - np.array( RANDOM_JOINT_OFFSET) joint_positions_high = np.array(joint_positions) + np.array( RANDOM_JOINT_OFFSET) joint_positions = np.random.default_rng().uniform( low=joint_positions_low, high=joint_positions_high) # Set initial robot joint positions self._set_joint_positions(joint_positions) # Update joint positions in rs_state rs_state.update(self.joint_positions) # Set target End Effector pose if ee_target_pose: assert len(ee_target_pose) == 6 else: ee_target_pose = self._get_target_pose() # Set initial state of the Robot Server state_msg = self._set_initial_robot_server_state( rs_state, ee_target_pose) if not self.client.set_state_msg(state_msg): raise RobotServerError("set_state") # Get Robot Server state rs_state = self.client.get_state_msg().state_dict # Check if the length and keys of the Robot Server state received is correct self._check_rs_state_keys(rs_state) # Convert the initial state from Robot Server format to environment format state = self._robot_server_state_to_env_state(rs_state) # Check if the environment state is contained in the observation space if not self.observation_space.contains(state): raise InvalidStateError() # Check if current position is in the range of the initial joint positions for joint in self.joint_positions.keys(): if not np.isclose( self.joint_positions[joint], rs_state[joint], atol=0.05): raise InvalidStateError( 'Reset joint positions are not within defined range') return state