def test_holonomic_diagonal_up_movement(self): radius = 1 time_step = .1 state = ObservableState(0, 0, 0, 0, radius) action = ActionXY(np.sqrt(2), np.sqrt(2)) next_state = propagate(state, action, time_step=time_step, kinematics='holonomic') expected_state = ObservableState(time_step * np.sqrt(2), time_step * np.sqrt(2), action.vx, action.vy, radius) self.assertEqual(next_state, expected_state)
def state_callback(self, observe_info): robot_state = observe_info.robot_state robot_full_state = FullState(robot_state.pos_x, robot_state.pos_y, robot_state.vel_x, robot_state.vel_y, robot_state.radius, robot_state.goal_x, robot_state.goal_y, robot_state.vmax, robot_state.theta) peds_full_state = [ ObservableState(ped_state.pos_x, ped_state.pos_y, ped_state.vel_x, ped_state.vel_y, ped_state.radius) for ped_state in observe_info.ped_states ] observable_states = peds_full_state self.cur_state = JointState(robot_full_state, observable_states) action_cmd = ActionCmd() dis = np.sqrt((robot_full_state.px - robot_full_state.gx)**2 + (robot_full_state.py - robot_full_state.gy)**2) if dis < 0.3: action_cmd.stop = True action_cmd.vel_x = 0.0 action_cmd.vel_y = 0.0 else: print("state callback") action_cmd.stop = False robot_action, robot_action_index = self.robot_policy.predict( self.cur_state) print('robot_action', robot_action.v, robot_action.r) action_cmd.vel_x = robot_action.v action_cmd.vel_y = robot_action.r self.robot_action_pub.publish(action_cmd)
def test_overlap_no_movement(self): cell_num = 4 cell_size = 1 om_channel_size = 1 human = Human() human.set(px=0, py=0, vx=0, vy=0, gx=0, gy=0, theta=0) other_agents = np.zeros((1, 4)) occupancy_map = build_occupancy_map(human, other_agents, cell_num, cell_size, om_channel_size) radius = 1 state = ObservableState(0, 0, 0, 0, radius) action = ActionXY(0, 0) next_state = propagate(state, action, time_step=.1, kinematics='holonomic') next_occupancy_map = propagate_occupancy_map(occupancy_map, state, action, .1, 'holonomic', cell_num, cell_size, om_channel_size) expected_next_occupancy_map = build_occupancy_map( next_state, other_agents, cell_num, cell_size, om_channel_size) self.assertTrue( np.array_equal(next_occupancy_map, expected_next_occupancy_map))
def __init__(self, robot_data): self.robot_radius = robot_data['radius'] self.robot_pref_speed = robot_data[ 'pref_speed'] # TODO: Make this a dynamic variable self.robot_mpc = robot_data['mpc'] if self.robot_mpc: self.dt = 0.1 self.times_steps = int(1.0 / self.dt) self.mpc = traj_opt(dt=self.dt, time_steps=self.times_steps) self.mpc_state = ModelState() self.mpc_psi = None self.stop_moving_flag = False self.state = ModelState() self.STATE_SET_FLAG = False self.goal = PoseStamped() self.GOAL_RECEIVED_FLAG = False self.GOAL_THRESH = 0.5 # External Agent(s) state self.other_agents_state = [ ObservableState(float("inf"), float("inf"), 0, 0, 0.3) ] self.OBS_RECEIVED_FLAG = False # what we use to send commands self.desired_action = ActionXY(0, 0)
def propagate(self, state, action): if isinstance(state, ObservableState): # propagate state of humans next_px = state.px + action.vx * self.time_step next_py = state.py + action.vy * self.time_step next_state = ObservableState(next_px, next_py, action.vx, action.vy, state.radius) elif isinstance(state, FullState): # propagate state of current agent # perform action without rotation if self.kinematics == 'holonomic': next_px = state.px + action.vx * self.time_step next_py = state.py + action.vy * self.time_step next_state = FullState(next_px, next_py, action.vx, action.vy, state.radius, state.gx, state.gy, state.v_pref, state.theta) else: next_theta = state.theta + action.r next_vx = action.v * np.cos(next_theta) next_vy = action.v * np.sin(next_theta) next_px = state.px + next_vx * self.time_step next_py = state.py + next_vy * self.time_step next_state = FullState(next_px, next_py, next_vx, next_vy, state.radius, state.gx, state.gy, state.v_pref, next_theta) else: raise ValueError('Type error') return next_state
def cal_obs_pos_lookahead(self, next_state, input_anchor): anch = [Point(-3, -4.5), Point(-3, 4.5), Point(3, 4.5), Point(3, -4.5)] rob = Circle([next_state.px, next_state.py], 1.5) #position x,y line = Segment(Point(next_state.px, next_state.py), anch[input_anchor]) block = intersection(rob, line) obs = ObservableState(float(block[0].coordinates[0]), float(block[0].coordinates[1]), next_state.vx, next_state.vy, 0.25) return obs
def test_no_movement(self): radius = 1 state = ObservableState(0, 0, 0, 0, radius) action = ActionXY(0, 0) next_state = propagate(state, action, time_step=.1, kinematics='holonomic') self.assertEqual(next_state, state)
def planner(self): # update robot robot.set(self.px, self.py, self.gx, self.gy, self.vx, self.vy, self.theta) dist_to_goal = np.linalg.norm( np.array(robot.get_position()) - np.array(robot.get_goal_position())) # compute command velocity if robot.reached_destination(): self.cmd_vel.linear.x = 0 self.cmd_vel.linear.y = 0 self.cmd_vel.linear.z = 0 self.cmd_vel.angular.x = 0 self.cmd_vel.angular.y = 0 self.cmd_vel.angular.z = 0 self.Is_goal_reached = True else: """ self state: FullState(px, py, vx, vy, radius, gx, gy, v_pref, theta) ob:[ObservableState(px1, py1, vx1, vy1, radius1), ObservableState(px1, py1, vx1, vy1, radius1), ....... ObservableState(pxn, pyn, vxn, vyn, radiusn)] """ if len(self.ob) == 0: self.ob = [ ObservableState(FAKE_HUMAN_PX, FAKE_HUMAN_PY, 0, 0, HUMAN_RADIUS) ] self.state = JointState(robot.get_full_state(), self.ob) action = policy.predict(self.state) # max_action self.cmd_vel.linear.x = action.v self.cmd_vel.linear.y = 0 self.cmd_vel.linear.z = 0 self.cmd_vel.angular.x = 0 self.cmd_vel.angular.y = 0 self.cmd_vel.angular.z = action.r ########### for debug ########## # dist_to_goal = np.linalg.norm(np.array(robot.get_position()) - np.array(robot.get_goal_position())) # if self.plan_counter % 10 == 0: # rospy.loginfo("robot position:(%s,%s)" % (self.px, self.py)) # rospy.loginfo("Distance to goal is %s" % dist_to_goal) # rospy.loginfo("self state:\n %s" % self.state.self_state) # for i in range(len(self.state.human_states)): # rospy.loginfo("human %s :\n %s" % (i+1, self.state.human_states[i])) # rospy.loginfo("%s-th action is planned: \n v: %s m/s \n r: %s rad/s" # % (self.plan_counter, self.cmd_vel.linear.x, self.cmd_vel.angular.z)) # publish velocity self.cmd_vel_pub.publish(self.cmd_vel) self.plan_counter += 1 self.visualize_action()
def observe(ob): obs = [] for element in ob: if isinstance(element, ObservableState): obs.append(element) elif isinstance(element, Shape): obs.append( ObservableState(element.px, element.py, 0, 0, element.radius)) return obs
def get_next_observable_state(self, action): self.check_validity(action) pos = self.compute_position(action, self.time_step) next_px, next_py = pos if self.kinematics == 'holonomic': next_vx = action.vx next_vy = action.vy else: next_vx = action.v * np.cos(self.theta) next_vy = action.v * np.sin(self.theta) return ObservableState(next_px, next_py, next_vx, next_vy, self.radius)
def other_agents_from_states(states): other_agents_copy = [] num_agents = len(states) for i in range(num_agents): radius = 0.3 # Spheres in gazebo x = states[i][0] y = states[i][1] vx = states[i][2] vy = states[i][3] other_agents_copy.append(ObservableState(x, y, vx, vy, radius)) return other_agents_copy
def compute_coordinate_observation(self, with_fov=False): # Todo: only consider humans in FOV px = self.robot_states[-1].position.x_val py = self.robot_states[-1].position.y_val if len(self.robot_states) == 1: vx = vy = 0 else: # TODO: use kinematics.linear_velocity? vx = self.robot_states[-1].position.x_val - self.robot_states[-2].position.x_val vy = self.robot_states[-1].position.y_val - self.robot_states[-2].position.y_val r = self.robot_radius gx = self.goal_position[0] gy = self.goal_position[1] v_pref = 1 _, _, theta = airsim.to_eularian_angles(self.robot_states[-1].orientation) robot_state = FullState(px, py, vx, vy, r, gx, gy, v_pref, theta) human_states = [] for i in range(self.human_num): if len(self.human_states[i]) == 1: vx = vy = 0 else: vx = (self.human_states[i][-1].position.x_val - self.human_states[i][-2].position.x_val) / self.time_step vy = (self.human_states[i][-1].position.y_val - self.human_states[i][-2].position.y_val) / self.time_step px = self.human_states[i][-1].position.x_val py = self.human_states[i][-1].position.y_val if with_fov: angle = np.arctan2(py - robot_state.py, px - robot_state.px) if abs(angle - robot_state.theta) > self.fov / 2: continue human_state = ObservableState(px, py, vx, vy, self.human_radius) human_states.append(human_state) if not human_states: human_states.append(ObservableState(-6, 0, 0, 0, self.human_radius)) joint_state = JointState(robot_state, human_states) return joint_state
def transform(self, state): """ Take the state passed from agent and transform it to the input of value network :param state: :return: tensor of shape (# of humans, len(state)) """ human_states_in_FOV = [] for human_state in state.human_states: if self.human_state_in_FOV(state.self_state, human_state): human_states_in_FOV.append(human_state) if len(human_states_in_FOV) > 0: state_tensor = torch.cat([ torch.Tensor([state.self_state + human_state]).to(self.device) for human_state in human_states_in_FOV ], dim=0) if self.with_om: occupancy_maps = self.build_occupancy_maps( human_states_in_FOV, state.self_state) state_tensor = torch.cat([ self.rotate(state_tensor), occupancy_maps.to(self.device) ], dim=1) else: state_tensor = self.rotate(state_tensor) else: state_tensor = self.rotate( torch.Tensor([ state.self_state + ObservableState(0, 0, 0, 0, 0) ]).to(self.device)) if self.with_om: occupancy_maps = self.build_occupancy_maps( [ObservableState(0, 0, 0, 0, 0)], state.self_state) state_tensor = torch.cat( [state_tensor, occupancy_maps.to(self.device)], dim=1) return state_tensor
def cb_other_agents(msg): # Create list of HUMANS global other_agents global OTHER_AGENTS_SET other_agents = [] num_agents = len(msg.name) for i in range(num_agents): radius = 0.3 # Spheres in gazebo x = msg.pose[i].position.x y = msg.pose[i].position.y vx = msg.twist[i].linear.x vy = msg.twist[i].linear.y other_agents.append(ObservableState(x, y, vx, vy, radius)) OTHER_AGENTS_SET = True
def propagate(state: State, action: Action, time_step: float, kinematics: str) -> State: if isinstance(state, ObservableState): # propagate state of humans next_px = state.px + action.vx * time_step next_py = state.py + action.vy * time_step next_state = ObservableState(next_px, next_py, action.vx, action.vy, state.radius) elif isinstance(state, FullState): # propagate state of current agent # perform action without rotation if kinematics == 'holonomic': next_px = state.px + action.vx * time_step next_py = state.py + action.vy * time_step next_state = FullState(next_px, next_py, action.vx, action.vy, state.radius, state.gx, state.gy, state.v_pref, state.theta) elif kinematics == 'unicycle': # altered for Turtlebot: next_theta = state.theta + (action.r * time_step) next_vx = action.v * np.cos(next_theta) next_vy = action.v * np.sin(next_theta) if action.r == 0: next_px = state.px + action.v * np.cos(state.theta) * time_step next_py = state.py + action.v * np.sin(state.theta) * time_step else: next_px = state.px + (action.v / action.r) * (np.sin( action.r * time_step + state.theta) - np.sin(state.theta)) next_py = state.py + (action.v / action.r) * (np.cos( state.theta) - np.cos(action.r * time_step + state.theta)) next_state = FullState(next_px, next_py, next_vx, next_vy, state.radius, state.gx, state.gy, state.v_pref, next_theta) else: next_theta = state.theta + action.r next_vx = action.v * np.cos(next_theta) next_vy = action.v * np.sin(next_theta) next_px = state.px + next_vx * time_step next_py = state.py + next_vy * time_step next_state = FullState(next_px, next_py, next_vx, next_vy, state.radius, state.gx, state.gy, state.v_pref, next_theta) else: raise ValueError('Type error') return next_state
def step(self, action): # convert lucia action to IANEnv action ianenv_action = np.array([0., 0., 0.]) # SOADRL - rotation is dtheta # IAN - rotation is dtheta/dt ianenv_action[2] = action.r / self.env._get_dt() # SOADRL - instant rot, then vel # IAN - vel, then rot action_vy = 0. # SOADRL outputs non-holonomic by default ianenv_action[0] = action.v * np.cos(action.r) - action_vy * np.sin( action.r) ianenv_action[1] = action.v * np.sin(action.r) + action_vy * np.cos( action.r) # get obs from IANEnv obs, rew, done, info = self.env.step(ianenv_action) # convert to SOADRL style robot_state = FullState( self.env.iarlenv.rlenv.virtual_peppers[0].pos[0], self.env.iarlenv.rlenv.virtual_peppers[0].pos[1], self.env.iarlenv.rlenv.virtual_peppers[0].vel[0], self.env.iarlenv.rlenv.virtual_peppers[0].vel[1], self.env.iarlenv.rlenv.vp_radii[0], self.env.iarlenv.rlenv.agent_goals[0][0], self.env.iarlenv.rlenv.agent_goals[0][1], self.v_pref, self.env.iarlenv.rlenv.virtual_peppers[0].pos[2], ) humans_state = [ ObservableState( human.pos[0], human.pos[1], human.vel[0], human.vel[1], r, ) for human, r in zip(self.env.iarlenv.rlenv.virtual_peppers[1:], self.env.iarlenv.rlenv.vp_radii[1:]) ] scan = obs[0] # for each angular section we take the min of the returns downsampled_scan = scan.reshape((-1, self.lidar_upsampling)) downsampled_scan = np.min(downsampled_scan, axis=1) self.last_downsampled_scan = downsampled_scan local_map = np.clip(downsampled_scan / self.angular_map_max_range, 0., 1.) obs = (robot_state, humans_state, local_map) return obs, rew, done, info
def cb_real_other_agent(msg): global other_agents global OTHER_AGENTS_SET global other_agent_prev_time_stamp x = msg.pose.position.x y = msg.pose.position.y vx = 0 vy = 0 curr_time_stamp = msg.header.stamp.secs + (msg.header.stamp.nsecs * 1e-9) if (OTHER_AGENTS_SET): vx = (other_agents[0].px - x) / (curr_time_stamp - other_agent_prev_time_stamp) vy = (other_agents[0].py - y) / (curr_time_stamp - other_agent_prev_time_stamp) other_agent_prev_time_stamp = curr_time_stamp other_agents = [] other_agents.append(ObservableState(x, y, vx, vy, 0.3)) OTHER_AGENTS_SET = True
def step(self, action, update=True): """ Compute actions for all agents, detect collision, update environment and return (ob, reward, done, info) """ if self.centralized_planning: agent_states = [human.get_full_state() for human in self.humans] if self.robot.visible: agent_states.append(self.robot.get_full_state()) human_actions = self.centralized_planner.predict( agent_states)[:-1] else: human_actions = self.centralized_planner.predict(agent_states) else: human_actions = [] for human in self.humans: ob = self.compute_observation_for(human) human_actions.append(human.act(ob)) # collision detection dmin = float('inf') collision = False for i, human in enumerate(self.humans): px = human.px - self.robot.px py = human.py - self.robot.py if self.robot.kinematics == 'holonomic': vx = human.vx - action.vx vy = human.vy - action.vy else: vx = human.vx - action.v * np.cos(action.r + self.robot.theta) vy = human.vy - action.v * np.sin(action.r + self.robot.theta) ex = px + vx * self.time_step ey = py + vy * self.time_step # closest distance between boundaries of two agents closest_dist = point_to_segment_dist( px, py, ex, ey, 0, 0) - human.radius - self.robot.radius if closest_dist < 0: collision = True logging.debug( "Collision: distance between robot and p{} is {:.2E} at time {:.2E}" .format(human.id, closest_dist, self.global_time)) break elif closest_dist < dmin: dmin = closest_dist # collision detection between humans human_num = len(self.humans) for i in range(human_num): for j in range(i + 1, human_num): dx = self.humans[i].px - self.humans[j].px dy = self.humans[i].py - self.humans[j].py dist = (dx**2 + dy**2)**( 1 / 2) - self.humans[i].radius - self.humans[j].radius if dist < 0: # detect collision but don't take humans' collision into account logging.debug('Collision happens between humans in step()') # check if reaching the goal end_position = np.array( self.robot.compute_position(action, self.time_step)) reaching_goal = norm( end_position - np.array(self.robot.get_goal_position())) < self.robot.radius if self.global_time >= self.time_limit - 1: reward = 0 done = True info = Timeout() elif collision: reward = self.collision_penalty done = True info = Collision() elif reaching_goal: reward = self.success_reward done = True info = ReachGoal() elif dmin < self.discomfort_dist: # adjust the reward based on FPS reward = (dmin - self.discomfort_dist ) * self.discomfort_penalty_factor * self.time_step done = False info = Discomfort(dmin) else: reward = 0 done = False info = Nothing() if update: # store state, action value and attention weights if hasattr(self.robot.policy, 'action_values'): self.action_values.append(self.robot.policy.action_values) if hasattr(self.robot.policy, 'get_attention_weights'): self.attention_weights.append( self.robot.policy.get_attention_weights()) if hasattr(self.robot.policy, 'get_matrix_A'): self.As.append(self.robot.policy.get_matrix_A()) if hasattr(self.robot.policy, 'get_feat'): self.feats.append(self.robot.policy.get_feat()) if hasattr(self.robot.policy, 'get_X'): self.Xs.append(self.robot.policy.get_X()) if hasattr(self.robot.policy, 'traj'): self.trajs.append(self.robot.policy.get_traj()) # update all agents self.robot.step(action) if self.global_frame == len(self.trajnet_samples): self.global_frame = 0 if self.trajnet: sample = self.trajnet_samples[self.global_frame] for i in range(len(sample)): dist = np.sqrt( np.square(self.robot.px - float(sample[i][1])) + np.square(self.robot.py - float(sample[i][2]))) sample[i].append(dist) sample = sorted(sample, key=itemgetter(3)) sample = sample[:self.human_num - 1] for i in range(human_num): if len(sample) > i: self.humans[i].human_id = sample[i][0] new_position = [ float(sample[i][1]), float(sample[i][2]) ] new_velocity = [(new_position[0] - self.humans[i].px) / self.time_step, (new_position[1] - self.humans[i].py) / self.time_step] self.humans[i].set_position(new_position) self.humans[i].set_velocity(new_velocity) else: self.humans[i].step(human_actions[i]) else: for human, action in zip(self.humans, human_actions): human.step(action) if self.nonstop_human and human.reached_destination(): self.generate_human(human) self.global_frame += 1 self.global_time += self.time_step self.states.append([ self.robot.get_full_state(), [human.get_full_state() for human in self.humans], [human.id for human in self.humans] ]) self.robot_actions.append(action) self.rewards.append(reward) # compute the observation if self.robot.sensor == 'coordinates': ob = self.compute_observation_for(self.robot) elif self.robot.sensor == 'RGB': raise NotImplementedError else: if self.robot.sensor == 'coordinates': if self.trajnet: index = self.global_frame + 1 if index > len(self.trajnet_samples): index = 0 sample = self.trajnet_samples[index] for i in range(len(sample)): dist = np.sqrt( np.square(self.robot.px - float(sample[i][1])) + np.square(self.robot.py - float(sample[i][2]))) sample[i].append(dist) sample = sorted(sample, key=itemgetter(3)) sample = sample[:self.human_num - 1] ob = [] for i in range(human_num): if len(sample) > i: self.humans[i].human_id = sample[i][0] new_position = [ float(sample[i][1]), float(sample[i][2]) ] new_velocity = [ (new_position[0] - self.humans[i].px) / self.time_step, (new_position[1] - self.humans[i].py) / self.time_step ] ob.append( ObservableState(self.humans[i].px, self.humans[i].py, self.humans[i].vx, self.humans[i].vy, self.humans[i].raduys)) else: ob = [ human.get_next_observable_state(action) for human, action in zip(self.humans, human_actions) ] elif self.robot.sensor == 'RGB': raise NotImplementedError return ob, reward, done, info
def get_observable_state(self): return ObservableState(self.px, self.py, self.vx, self.vy, self.radius)
def predict(self, state): """ A base class for all methods that takes pairwise joint state as input to value network. The input to the value network is always of shape (batch_size, # humans, rotated joint state length) """ if self.phase is None or self.device is None: raise AttributeError('Phase, device attributes have to be set!') if self.phase == 'train' and self.epsilon is None: raise AttributeError( 'Epsilon attribute has to be set in training phase') if self.reach_destination(state): return ActionXY( 0, 0) if self.kinematics == 'holonomic' else ActionRot(0, 0) if self.action_space is None: self.build_action_space(state.self_state.v_pref) occupancy_maps = None probability = np.random.random() if self.phase == 'train' and probability < self.epsilon: max_action = self.action_space[np.random.choice( len(self.action_space))] else: self.action_values = list() max_value = float('-inf') max_action = None for action in self.action_space: next_self_state = self.propagate(state.self_state, action) if self.query_env: next_human_states, reward, done, info = self.env.onestep_lookahead( action) else: next_human_states = [ self.propagate( human_state, ActionXY(human_state.vx, human_state.vy)) for human_state in state.human_states ] reward = self.compute_reward(next_self_state, next_human_states) if len(next_human_states) == 0: next_human_states = [ObservableState(0, 0, 0, 0, 0)] batch_next_states = torch.cat([ torch.Tensor([next_self_state + next_human_state]).to( self.device) for next_human_state in next_human_states ], dim=0) rotated_batch_input = self.rotate(batch_next_states).unsqueeze( 0) if self.with_om: if occupancy_maps is None: occupancy_maps = self.build_occupancy_maps( next_human_states, state.self_state).unsqueeze(0) rotated_batch_input = torch.cat( [rotated_batch_input, occupancy_maps.to(self.device)], dim=2) # VALUE UPDATE next_state_value = self.model(rotated_batch_input).data.item() value = reward + pow( self.gamma, self.time_step * state.self_state.v_pref) * next_state_value self.action_values.append(value) if value > max_value: max_value = value max_action = action if max_action is None: raise ValueError('Value network is not well trained. ') if self.phase == 'train': self.last_state = self.transform(state) return max_action
def main(): parser = argparse.ArgumentParser('Parse configuration file') parser.add_argument('--env_config', type=str, default='configs/env.config') parser.add_argument('--policy_config', type=str, default='configs/policy.config') parser.add_argument('--policy', type=str, default='orca') parser.add_argument('--model_dir', type=str, default=None) parser.add_argument('--il', default=False, action='store_true') parser.add_argument('--gpu', default=False, action='store_true') parser.add_argument('--visualize', default=True, action='store_true') parser.add_argument('--phase', type=str, default='test') parser.add_argument('--test_case', type=int, default=0) parser.add_argument('--square', default=False, action='store_true') parser.add_argument('--circle', default=False, action='store_true') parser.add_argument('--video_file', type=str, default=None) parser.add_argument('--traj', default=False, action='store_true') parser.add_argument('--plot', default=False, action='store_true') args = parser.parse_args() if args.model_dir is not None: env_config_file = os.path.join(args.model_dir, os.path.basename(args.env_config)) policy_config_file = os.path.join(args.model_dir, os.path.basename(args.policy_config)) if args.il: model_weights = os.path.join(args.model_dir, 'il_model.pth') else: if os.path.exists( os.path.join(args.model_dir, 'resumed_rl_model.pth')): model_weights = os.path.join(args.model_dir, 'resumed_rl_model.pth') else: model_weights = os.path.join(args.model_dir, 'rl_model.pth') else: env_config_file = args.env_config policy_config_file = args.env_config # configure logging and device logging.basicConfig(level=logging.INFO, format='%(asctime)s, %(levelname)s: %(message)s', datefmt="%Y-%m-%d %H:%M:%S") device = torch.device( "cuda:0" if torch.cuda.is_available() and args.gpu else "cpu") logging.info('Using device: %s', device) # configure policy policy = policy_factory[args.policy]() policy_config = configparser.RawConfigParser() policy_config.read(policy_config_file) policy.configure(policy_config) if policy.trainable: if args.model_dir is None: parser.error( 'Trainable policy must be specified with a model weights directory' ) policy.get_model().load_state_dict( torch.load(model_weights, map_location=torch.device('cpu'))) # configure environment env_config = configparser.RawConfigParser() env_config.read(env_config_file) env = gym.make('CrowdSim-v0') env.configure(env_config) if args.square: env.test_sim = 'square_crossing' if args.circle: env.test_sim = 'circle_crossing' robot = Robot(env_config, 'robot') robot.set_policy(policy) env.set_robot(robot) explorer = Explorer(env, robot, device, gamma=0.9) policy.set_phase(args.phase) policy.set_device(device) # set safety space for ORCA in non-cooperative simulation if isinstance(robot.policy, ORCA): if robot.visible: robot.policy.safety_space = 0 else: # because invisible case breaks the reciprocal assumption # adding some safety space improves ORCA performance. Tune this value based on your need. robot.policy.safety_space = 0 logging.info('ORCA agent buffer: %f', robot.policy.safety_space) policy.set_env(env) robot.print_info() if args.visualize: obs = env.reset(args.phase, args.test_case) done = False last_pos = np.array(robot.get_position()) policy_time = 0.0 numFrame = 0 dist = 20.0 obs_flg = 0 sim_time = False while sim_time == False: samples, robot_state, sim_time = pc2obs.pc2obs( voxel_size=voxel_size) t1 = float(sim_time) while (dist > 0.8): #t1 = time.time() env.humans = [] samples, robot_state, sim_time = pc2obs.pc2obs( voxel_size=voxel_size) if type(samples) == type(False): print("Not Connect") continue dist = math.sqrt((GOAL_X - robot_state[0])**2 + (GOAL_Y - robot_state[1])**2) if obs_flg == 0 and dist < 10: os.system('sh ./init.sh') obs_flg = 1 print("dist: {}".format(dist)) # rotate and shift obs position t2 = time.time() yaw = robot_state[2] rot_matrix = np.array([[math.cos(yaw), math.sin(yaw)], [-math.sin(yaw), math.cos(yaw)]]) #samples = np.array([sample + robot_state[0:2] for sample in samples[:,0:2]]) if len(samples) == 1: samples = np.array( [np.dot(rot_matrix, samples[0][0:2]) + robot_state[0:2]]) print(samples) elif len(samples) > 1: samples = np.array([ np.dot(rot_matrix, sample) + robot_state[0:2] for sample in samples[:, 0:2] ]) obs_position_list = samples obs = [] for ob in obs_position_list: human = Human(env.config, 'humans') # px, py, gx, gy, vx, vy, theta human.set(ob[0], ob[1], ob[0], ob[1], 0, 0, 0) env.humans.append(human) # px, py, vx, vy, radius obs.append(ObservableState(ob[0], ob[1], 0, 0, voxel_size / 2)) if len(obs_position_list) == 0: human = Human(env.config, 'humans') # SARL, CADRL human.set(0, -10, 0, -10, 0, 0, 0) # LSTM #human.set(robot_state[0]+10, robot_state[1]+10, robot_state[0]+10, robot_state[1]+10 , 0, 0, 0) env.humans.append(human) # SARL, CADRL obs.append(ObservableState(0, -10, 0, 0, voxel_size / 2)) # LSTM #obs.append(ObservableState(robot_state[0]+10, robot_state[1]+10, 0, 0, voxel_size/2)) robot.set_position(robot_state) robot.set_velocity([math.sin(yaw), math.cos(yaw)]) #print(obs) action = robot.act(obs) obs, _, done, info = env.step(action) current_pos = np.array(robot.get_position()) current_vel = np.array(robot.get_velocity()) #print("Velocity: {}, {}".format(current_vel[0], current_vel[1])) logging.debug( 'Speed: %.2f', np.linalg.norm(current_pos - last_pos) / robot.time_step) last_pos = current_pos policy_time += time.time() - t1 numFrame += 1 #print(t2-t1, time.time() - t2) diff_angle = (-yaw + math.atan2(current_vel[0], current_vel[1])) if diff_angle > math.pi: diff_angle = diff_angle - 2 * math.pi elif diff_angle < -math.pi: diff_angle = diff_angle + 2 * math.pi #print("diff_angle: {}, {}, {}".format(diff_angle, yaw ,-math.atan2(current_vel[0], current_vel[1]))) if diff_angle < -0.7: direc = 2 # turn left elif diff_angle > 0.7: direc = 3 # turn right else: direc = 1 # go straight # GoEasy(direc) vx = SPEED * math.sqrt(current_vel[0]**2 + current_vel[1]**2) if diff_angle > 0: v_ang = ANGULAR_SPEED * min(diff_angle / (math.pi / 2), 1) else: v_ang = ANGULAR_SPEED * max(diff_angle / (math.pi / 2), -1) print(vx, -v_ang) easyGo.mvCurve(vx, -v_ang) if args.plot: plt.scatter(current_pos[0], current_pos[1], label='robot') plt.arrow(current_pos[0], current_pos[1], current_vel[0], current_vel[1], width=0.05, fc='g', ec='g') plt.arrow(current_pos[0], current_pos[1], math.sin(yaw), math.cos(yaw), width=0.05, fc='r', ec='r') if len(samples) == 1: plt.scatter(samples[0][0], samples[0][1], label='obstacles') elif len(samples) > 1: plt.scatter(samples[:, 0], samples[:, 1], label='obstacles') plt.xlim(-6.5, 6.5) plt.ylim(-9, 4) plt.legend() plt.title("gazebo test") plt.xlabel("x (m)") plt.ylabel("y (m)") plt.pause(0.001) plt.cla() plt.clf() print("NAV TIME {}".format(float(sim_time) - t1)) time.sleep(0.5) print("NAV TIME {}".format(float(sim_time) - t1)) easyGo.stop() plt.close() print("Average took {} sec per iteration, {} Frame".format( policy_time / numFrame, numFrame)) else: explorer.run_k_episodes(env.case_size[args.phase], args.phase, print_failure=True)