def propagate(self, state, action): if isinstance(state, ObservableState): # propagate state of humans next_px = state.px + action.vx * self.time_step next_py = state.py + action.vy * self.time_step next_state = ObservableState(next_px, next_py, action.vx, action.vy, state.radius) elif isinstance(state, FullState): # propagate state of current agent # perform action without rotation if self.kinematics == 'holonomic': next_px = state.px + action.vx * self.time_step next_py = state.py + action.vy * self.time_step next_state = FullState(next_px, next_py, action.vx, action.vy, state.radius, state.gx, state.gy, state.v_pref, state.theta) else: next_theta = state.theta + action.r next_vx = action.v * np.cos(next_theta) next_vy = action.v * np.sin(next_theta) next_px = state.px + next_vx * self.time_step next_py = state.py + next_vy * self.time_step next_state = FullState(next_px, next_py, next_vx, next_vy, state.radius, state.gx, state.gy, state.v_pref, next_theta) else: raise ValueError('Type error') return next_state
def test_non_holonomic_left_movement(self): radius = 1 time_step = .1 state = FullState(0, 0, 0, 0, radius, 0, 0, 0, 0) r = np.pi action = ActionRot(1., r) next_state = propagate(state, action, time_step=time_step, kinematics='non_holonomic') expected_state = FullState(time_step * np.cos(r), time_step * np.sin(r), \ np.cos(r), np.sin(r), radius, 0, 0, 0, r) self.assertEqual(expected_state, next_state)
def state_callback(self, observe_info): robot_state = observe_info.robot_state robot_full_state = FullState(robot_state.pos_x, robot_state.pos_y, robot_state.vel_x, robot_state.vel_y, robot_state.radius, robot_state.goal_x, robot_state.goal_y, robot_state.vmax, robot_state.theta) peds_full_state = [ ObservableState(ped_state.pos_x, ped_state.pos_y, ped_state.vel_x, ped_state.vel_y, ped_state.radius) for ped_state in observe_info.ped_states ] observable_states = peds_full_state self.cur_state = JointState(robot_full_state, observable_states) action_cmd = ActionCmd() dis = np.sqrt((robot_full_state.px - robot_full_state.gx)**2 + (robot_full_state.py - robot_full_state.gy)**2) if dis < 0.3: action_cmd.stop = True action_cmd.vel_x = 0.0 action_cmd.vel_y = 0.0 else: print("state callback") action_cmd.stop = False robot_action, robot_action_index = self.robot_policy.predict( self.cur_state) print('robot_action', robot_action.v, robot_action.r) action_cmd.vel_x = robot_action.v action_cmd.vel_y = robot_action.r self.robot_action_pub.publish(action_cmd)
def propagate(state: State, action: Action, time_step: float, kinematics: str) -> State: if isinstance(state, ObservableState): # propagate state of humans next_px = state.px + action.vx * time_step next_py = state.py + action.vy * time_step next_state = ObservableState(next_px, next_py, action.vx, action.vy, state.radius) elif isinstance(state, FullState): # propagate state of current agent # perform action without rotation if kinematics == 'holonomic': next_px = state.px + action.vx * time_step next_py = state.py + action.vy * time_step next_state = FullState(next_px, next_py, action.vx, action.vy, state.radius, state.gx, state.gy, state.v_pref, state.theta) elif kinematics == 'unicycle': # altered for Turtlebot: next_theta = state.theta + (action.r * time_step) next_vx = action.v * np.cos(next_theta) next_vy = action.v * np.sin(next_theta) if action.r == 0: next_px = state.px + action.v * np.cos(state.theta) * time_step next_py = state.py + action.v * np.sin(state.theta) * time_step else: next_px = state.px + (action.v / action.r) * (np.sin( action.r * time_step + state.theta) - np.sin(state.theta)) next_py = state.py + (action.v / action.r) * (np.cos( state.theta) - np.cos(action.r * time_step + state.theta)) next_state = FullState(next_px, next_py, next_vx, next_vy, state.radius, state.gx, state.gy, state.v_pref, next_theta) else: next_theta = state.theta + action.r next_vx = action.v * np.cos(next_theta) next_vy = action.v * np.sin(next_theta) next_px = state.px + next_vx * time_step next_py = state.py + next_vy * time_step next_state = FullState(next_px, next_py, next_vx, next_vy, state.radius, state.gx, state.gy, state.v_pref, next_theta) else: raise ValueError('Type error') return next_state
def get_full_state(self): return FullState( self.px, self.py, self.vx, self.vy, self.radius, self.gx, self.gy, self.v_pref, self.theta, )
def step(self, action): # convert lucia action to IANEnv action ianenv_action = np.array([0., 0., 0.]) # SOADRL - rotation is dtheta # IAN - rotation is dtheta/dt ianenv_action[2] = action.r / self.env._get_dt() # SOADRL - instant rot, then vel # IAN - vel, then rot action_vy = 0. # SOADRL outputs non-holonomic by default ianenv_action[0] = action.v * np.cos(action.r) - action_vy * np.sin( action.r) ianenv_action[1] = action.v * np.sin(action.r) + action_vy * np.cos( action.r) # get obs from IANEnv obs, rew, done, info = self.env.step(ianenv_action) # convert to SOADRL style robot_state = FullState( self.env.iarlenv.rlenv.virtual_peppers[0].pos[0], self.env.iarlenv.rlenv.virtual_peppers[0].pos[1], self.env.iarlenv.rlenv.virtual_peppers[0].vel[0], self.env.iarlenv.rlenv.virtual_peppers[0].vel[1], self.env.iarlenv.rlenv.vp_radii[0], self.env.iarlenv.rlenv.agent_goals[0][0], self.env.iarlenv.rlenv.agent_goals[0][1], self.v_pref, self.env.iarlenv.rlenv.virtual_peppers[0].pos[2], ) humans_state = [ ObservableState( human.pos[0], human.pos[1], human.vel[0], human.vel[1], r, ) for human, r in zip(self.env.iarlenv.rlenv.virtual_peppers[1:], self.env.iarlenv.rlenv.vp_radii[1:]) ] scan = obs[0] # for each angular section we take the min of the returns downsampled_scan = scan.reshape((-1, self.lidar_upsampling)) downsampled_scan = np.min(downsampled_scan, axis=1) self.last_downsampled_scan = downsampled_scan local_map = np.clip(downsampled_scan / self.angular_map_max_range, 0., 1.) obs = (robot_state, humans_state, local_map) return obs, rew, done, info
def compute_coordinate_observation(self, with_fov=False): # Todo: only consider humans in FOV px = self.robot_states[-1].position.x_val py = self.robot_states[-1].position.y_val if len(self.robot_states) == 1: vx = vy = 0 else: # TODO: use kinematics.linear_velocity? vx = self.robot_states[-1].position.x_val - self.robot_states[-2].position.x_val vy = self.robot_states[-1].position.y_val - self.robot_states[-2].position.y_val r = self.robot_radius gx = self.goal_position[0] gy = self.goal_position[1] v_pref = 1 _, _, theta = airsim.to_eularian_angles(self.robot_states[-1].orientation) robot_state = FullState(px, py, vx, vy, r, gx, gy, v_pref, theta) human_states = [] for i in range(self.human_num): if len(self.human_states[i]) == 1: vx = vy = 0 else: vx = (self.human_states[i][-1].position.x_val - self.human_states[i][-2].position.x_val) / self.time_step vy = (self.human_states[i][-1].position.y_val - self.human_states[i][-2].position.y_val) / self.time_step px = self.human_states[i][-1].position.x_val py = self.human_states[i][-1].position.y_val if with_fov: angle = np.arctan2(py - robot_state.py, px - robot_state.px) if abs(angle - robot_state.theta) > self.fov / 2: continue human_state = ObservableState(px, py, vx, vy, self.human_radius) human_states.append(human_state) if not human_states: human_states.append(ObservableState(-6, 0, 0, 0, self.human_radius)) joint_state = JointState(robot_state, human_states) return joint_state
def step(self, action, update=True, rebuild=True): """ Compute actions for all agents, detect collision, update environment and return (ob, reward, done, info) """ human_actions = [] for human in self.humans: # observation for humans is always coordinates ob = [other_human.get_observable_state() for other_human in self.humans if other_human != human] if self.robot.visible: ob += [self.robot.get_observable_state()] human_actions.append(human.act(ob)) # collision detection dmin = float('inf') collision = False for i, human in enumerate(self.humans): px = human.px - self.robot.px py = human.py - self.robot.py if self.robot.kinematics == 'holonomic': vx = human.vx - action.vx vy = human.vy - action.vy else: vx = human.vx - action.v * np.cos(action.r + self.robot.theta) vy = human.vy - action.v * np.sin(action.r + self.robot.theta) ex = px + vx * self.time_step ey = py + vy * self.time_step # closest distance between boundaries of two agents closest_dist = point_to_segment_dist(px, py, ex, ey, 0, 0) - human.radius - self.robot.radius if closest_dist < 0: collision = True # logging.debug("Collision: distance between robot and p{} is {:.2E}".format(i, closest_dist)) break elif closest_dist < dmin: dmin = closest_dist # begin = time() self.map.build_map_cpu(self.humans, rebuild) # if not update: # print("rebuild: ", rebuild, "build map time: ", 81 * (time() - begin)) agent_fullstate = FullState(self.robot.px + action.vx * self.agent_timestep, self.robot.py + action.vy * self.agent_timestep, self.robot.vx, self.robot.vy, self.robot.radius, self.robot.gx, self.robot.gy, self.robot.v_pref, self.robot.theta) # begin = time() collision_probabbility = self.map.compute_occupied_probability(agent_fullstate) # if not update: # print("rebuild: ", rebuild, "build map time: ", 81 * (time() - begin)) stationary_dist = ((self.robot.py + self.circle_radius)**2 + (self.robot.px**2))**(1 / 2) # collision detection between humans human_num = len(self.humans) for i in range(human_num): for j in range(i + 1, human_num): dx = self.humans[i].px - self.humans[j].px dy = self.humans[i].py - self.humans[j].py dist = (dx**2 + dy**2)**(1 / 2) - self.humans[i].radius - self.humans[j].radius if dist < 0: # detect collision but don't take humans' collision into account logging.debug('Collision happens between humans in step()') # check if reaching the goal end_position = np.array(self.robot.compute_position(action, self.time_step)) reaching_goal = norm(end_position - np.array(self.robot.get_goal_position())) < self.robot.radius stationary_state = False if self.robot.kinematics == 'holonomic': if abs(action.vx) <= 0.0001 and abs(action.vy) <= 0.0001: stationary_state = True else: if abs(action.v) <= 0.0001: stationary_state = True if self.global_time >= self.time_limit - 1: reward = 0 done = True info = Timeout() elif collision: reward = self.collision_penalty done = True info = Collision() elif reaching_goal: reward = self.success_reward done = True info = ReachGoal() else: reward = -0.25 * collision_probabbility * self.reward_increment done = False if dmin < self.discomfort_dist: info = Danger(dmin) else: info = Nothing() if update: # store state, action value and attention weights self.states.append([self.robot.get_full_state(), [human.get_full_state() for human in self.humans]]) if hasattr(self.robot.policy, 'action_values'): self.action_values.append(self.robot.policy.action_values) if hasattr(self.robot.policy, 'get_attention_weights'): self.attention_weights.append(self.robot.policy.get_attention_weights()) # update all agents self.robot.step(action) for i, human_action in enumerate(human_actions): self.humans[i].step(human_action) self.global_time += self.time_step for i, human in enumerate(self.humans): # only record the first time the human reaches the goal if self.human_times[i] == 0 and human.reached_destination(): self.human_times[i] = self.global_time # compute the observation if self.robot.sensor == 'coordinates': ob = [human.get_observable_state() for human in self.humans] elif self.robot.sensor == 'RGB': raise NotImplementedError else: if self.robot.sensor == 'coordinates': ob = [human.get_next_observable_state(action) for human, action in zip(self.humans, human_actions)] elif self.robot.sensor == 'RGB': raise NotImplementedError # self.agent_prev_vx = action.vx # self.agent_prev_vy = action.vy if self.robot.kinematics == 'holonomic': self.agent_prev_vx = action.vx self.agent_prev_vy = action.vy else: self.agent_prev_vx = action.v * np.cos(action.r + self.robot.theta) self.agent_prev_vy = action.v * np.sin(action.r + self.robot.theta) return ob, reward, done, info