Ejemplo n.º 1
0
 def test_holonomic_diagonal_up_movement(self):
     radius = 1
     time_step = .1
     state = ObservableState(0, 0, 0, 0, radius)
     action = ActionXY(np.sqrt(2), np.sqrt(2))
     next_state = propagate(state,
                            action,
                            time_step=time_step,
                            kinematics='holonomic')
     expected_state = ObservableState(time_step * np.sqrt(2),
                                      time_step * np.sqrt(2), action.vx,
                                      action.vy, radius)
     self.assertEqual(next_state, expected_state)
Ejemplo n.º 2
0
    def state_callback(self, observe_info):
        robot_state = observe_info.robot_state
        robot_full_state = FullState(robot_state.pos_x, robot_state.pos_y,
                                     robot_state.vel_x, robot_state.vel_y,
                                     robot_state.radius, robot_state.goal_x,
                                     robot_state.goal_y, robot_state.vmax,
                                     robot_state.theta)
        peds_full_state = [
            ObservableState(ped_state.pos_x, ped_state.pos_y, ped_state.vel_x,
                            ped_state.vel_y, ped_state.radius)
            for ped_state in observe_info.ped_states
        ]
        observable_states = peds_full_state
        self.cur_state = JointState(robot_full_state, observable_states)
        action_cmd = ActionCmd()

        dis = np.sqrt((robot_full_state.px - robot_full_state.gx)**2 +
                      (robot_full_state.py - robot_full_state.gy)**2)
        if dis < 0.3:
            action_cmd.stop = True
            action_cmd.vel_x = 0.0
            action_cmd.vel_y = 0.0
        else:
            print("state callback")
            action_cmd.stop = False
            robot_action, robot_action_index = self.robot_policy.predict(
                self.cur_state)
            print('robot_action', robot_action.v, robot_action.r)
            action_cmd.vel_x = robot_action.v
            action_cmd.vel_y = robot_action.r
        self.robot_action_pub.publish(action_cmd)
Ejemplo n.º 3
0
    def test_overlap_no_movement(self):
        cell_num = 4
        cell_size = 1
        om_channel_size = 1
        human = Human()
        human.set(px=0, py=0, vx=0, vy=0, gx=0, gy=0, theta=0)
        other_agents = np.zeros((1, 4))
        occupancy_map = build_occupancy_map(human, other_agents, cell_num,
                                            cell_size, om_channel_size)

        radius = 1
        state = ObservableState(0, 0, 0, 0, radius)
        action = ActionXY(0, 0)
        next_state = propagate(state,
                               action,
                               time_step=.1,
                               kinematics='holonomic')
        next_occupancy_map = propagate_occupancy_map(occupancy_map, state,
                                                     action, .1, 'holonomic',
                                                     cell_num, cell_size,
                                                     om_channel_size)
        expected_next_occupancy_map = build_occupancy_map(
            next_state, other_agents, cell_num, cell_size, om_channel_size)

        self.assertTrue(
            np.array_equal(next_occupancy_map, expected_next_occupancy_map))
Ejemplo n.º 4
0
    def __init__(self, robot_data):

        self.robot_radius = robot_data['radius']
        self.robot_pref_speed = robot_data[
            'pref_speed']  # TODO: Make this a dynamic variable
        self.robot_mpc = robot_data['mpc']
        if self.robot_mpc:
            self.dt = 0.1
            self.times_steps = int(1.0 / self.dt)
            self.mpc = traj_opt(dt=self.dt, time_steps=self.times_steps)
            self.mpc_state = ModelState()
            self.mpc_psi = None

        self.stop_moving_flag = False
        self.state = ModelState()
        self.STATE_SET_FLAG = False
        self.goal = PoseStamped()
        self.GOAL_RECEIVED_FLAG = False
        self.GOAL_THRESH = 0.5

        # External Agent(s) state
        self.other_agents_state = [
            ObservableState(float("inf"), float("inf"), 0, 0, 0.3)
        ]
        self.OBS_RECEIVED_FLAG = False

        # what we use to send commands
        self.desired_action = ActionXY(0, 0)
Ejemplo n.º 5
0
    def propagate(self, state, action):
        if isinstance(state, ObservableState):
            # propagate state of humans
            next_px = state.px + action.vx * self.time_step
            next_py = state.py + action.vy * self.time_step
            next_state = ObservableState(next_px, next_py, action.vx,
                                         action.vy, state.radius)
        elif isinstance(state, FullState):
            # propagate state of current agent
            # perform action without rotation
            if self.kinematics == 'holonomic':
                next_px = state.px + action.vx * self.time_step
                next_py = state.py + action.vy * self.time_step
                next_state = FullState(next_px, next_py, action.vx, action.vy,
                                       state.radius, state.gx, state.gy,
                                       state.v_pref, state.theta)
            else:
                next_theta = state.theta + action.r
                next_vx = action.v * np.cos(next_theta)
                next_vy = action.v * np.sin(next_theta)
                next_px = state.px + next_vx * self.time_step
                next_py = state.py + next_vy * self.time_step
                next_state = FullState(next_px, next_py, next_vx, next_vy,
                                       state.radius, state.gx, state.gy,
                                       state.v_pref, next_theta)
        else:
            raise ValueError('Type error')

        return next_state
Ejemplo n.º 6
0
 def cal_obs_pos_lookahead(self, next_state, input_anchor):
     anch = [Point(-3, -4.5), Point(-3, 4.5), Point(3, 4.5), Point(3, -4.5)]
     rob = Circle([next_state.px, next_state.py], 1.5)  #position x,y
     line = Segment(Point(next_state.px, next_state.py), anch[input_anchor])
     block = intersection(rob, line)
     obs = ObservableState(float(block[0].coordinates[0]),
                           float(block[0].coordinates[1]), next_state.vx,
                           next_state.vy, 0.25)
     return obs
Ejemplo n.º 7
0
 def test_no_movement(self):
     radius = 1
     state = ObservableState(0, 0, 0, 0, radius)
     action = ActionXY(0, 0)
     next_state = propagate(state,
                            action,
                            time_step=.1,
                            kinematics='holonomic')
     self.assertEqual(next_state, state)
Ejemplo n.º 8
0
    def planner(self):
        # update robot
        robot.set(self.px, self.py, self.gx, self.gy, self.vx, self.vy,
                  self.theta)
        dist_to_goal = np.linalg.norm(
            np.array(robot.get_position()) -
            np.array(robot.get_goal_position()))

        # compute command velocity
        if robot.reached_destination():
            self.cmd_vel.linear.x = 0
            self.cmd_vel.linear.y = 0
            self.cmd_vel.linear.z = 0
            self.cmd_vel.angular.x = 0
            self.cmd_vel.angular.y = 0
            self.cmd_vel.angular.z = 0
            self.Is_goal_reached = True

        else:
            """
            self state: FullState(px, py, vx, vy, radius, gx, gy, v_pref, theta)
            ob:[ObservableState(px1, py1, vx1, vy1, radius1),
                ObservableState(px1, py1, vx1, vy1, radius1),
                   .......                    
                ObservableState(pxn, pyn, vxn, vyn, radiusn)]
            """
            if len(self.ob) == 0:
                self.ob = [
                    ObservableState(FAKE_HUMAN_PX, FAKE_HUMAN_PY, 0, 0,
                                    HUMAN_RADIUS)
                ]

            self.state = JointState(robot.get_full_state(), self.ob)
            action = policy.predict(self.state)  # max_action
            self.cmd_vel.linear.x = action.v
            self.cmd_vel.linear.y = 0
            self.cmd_vel.linear.z = 0
            self.cmd_vel.angular.x = 0
            self.cmd_vel.angular.y = 0
            self.cmd_vel.angular.z = action.r

        ########### for debug ##########
        # dist_to_goal = np.linalg.norm(np.array(robot.get_position()) - np.array(robot.get_goal_position()))
        # if self.plan_counter % 10 == 0:
        #     rospy.loginfo("robot position:(%s,%s)" % (self.px, self.py))
        #     rospy.loginfo("Distance to goal is %s" % dist_to_goal)
        #     rospy.loginfo("self state:\n %s" % self.state.self_state)
        #     for i in range(len(self.state.human_states)):
        #         rospy.loginfo("human %s :\n %s" % (i+1, self.state.human_states[i]))
        #     rospy.loginfo("%s-th action is planned: \n v: %s m/s \n r: %s rad/s"
        #                   % (self.plan_counter, self.cmd_vel.linear.x, self.cmd_vel.angular.z))

        # publish velocity
        self.cmd_vel_pub.publish(self.cmd_vel)
        self.plan_counter += 1
        self.visualize_action()
Ejemplo n.º 9
0
    def observe(ob):
        obs = []
        for element in ob:
            if isinstance(element, ObservableState):
                obs.append(element)
            elif isinstance(element, Shape):
                obs.append(
                    ObservableState(element.px, element.py, 0, 0,
                                    element.radius))

        return obs
Ejemplo n.º 10
0
 def get_next_observable_state(self, action):
     self.check_validity(action)
     pos = self.compute_position(action, self.time_step)
     next_px, next_py = pos
     if self.kinematics == 'holonomic':
         next_vx = action.vx
         next_vy = action.vy
     else:
         next_vx = action.v * np.cos(self.theta)
         next_vy = action.v * np.sin(self.theta)
     return ObservableState(next_px, next_py, next_vx, next_vy, self.radius)
Ejemplo n.º 11
0
def other_agents_from_states(states):
    other_agents_copy = []
    num_agents = len(states)
    for i in range(num_agents):
        radius = 0.3  # Spheres in gazebo
        x = states[i][0]
        y = states[i][1]
        vx = states[i][2]
        vy = states[i][3]
        other_agents_copy.append(ObservableState(x, y, vx, vy, radius))
    return other_agents_copy
Ejemplo n.º 12
0
    def compute_coordinate_observation(self, with_fov=False):
        # Todo: only consider humans in FOV
        px = self.robot_states[-1].position.x_val
        py = self.robot_states[-1].position.y_val
        if len(self.robot_states) == 1:
            vx = vy = 0
        else:
            # TODO: use kinematics.linear_velocity?
            vx = self.robot_states[-1].position.x_val - self.robot_states[-2].position.x_val
            vy = self.robot_states[-1].position.y_val - self.robot_states[-2].position.y_val
        r  = self.robot_radius
        gx = self.goal_position[0]
        gy = self.goal_position[1]
        v_pref = 1
        _, _, theta = airsim.to_eularian_angles(self.robot_states[-1].orientation)
        robot_state = FullState(px, py, vx, vy, r, gx, gy, v_pref, theta)

        human_states = []
        for i in range(self.human_num):
            if len(self.human_states[i]) == 1:
                vx = vy = 0
            else:
                vx = (self.human_states[i][-1].position.x_val - self.human_states[i][-2].position.x_val) / self.time_step
                vy = (self.human_states[i][-1].position.y_val - self.human_states[i][-2].position.y_val) / self.time_step
            px = self.human_states[i][-1].position.x_val
            py = self.human_states[i][-1].position.y_val

            if with_fov:
                angle = np.arctan2(py - robot_state.py, px - robot_state.px)
                if abs(angle - robot_state.theta) > self.fov / 2:
                    continue

            human_state = ObservableState(px, py, vx, vy, self.human_radius)
            human_states.append(human_state)

        if not human_states:
            human_states.append(ObservableState(-6, 0, 0, 0, self.human_radius))

        joint_state = JointState(robot_state, human_states)

        return joint_state
Ejemplo n.º 13
0
    def transform(self, state):
        """
        Take the state passed from agent and transform it to the input of value network

        :param state:
        :return: tensor of shape (# of humans, len(state))
        """
        human_states_in_FOV = []
        for human_state in state.human_states:
            if self.human_state_in_FOV(state.self_state, human_state):
                human_states_in_FOV.append(human_state)
        if len(human_states_in_FOV) > 0:
            state_tensor = torch.cat([
                torch.Tensor([state.self_state + human_state]).to(self.device)
                for human_state in human_states_in_FOV
            ],
                                     dim=0)
            if self.with_om:
                occupancy_maps = self.build_occupancy_maps(
                    human_states_in_FOV, state.self_state)
                state_tensor = torch.cat([
                    self.rotate(state_tensor),
                    occupancy_maps.to(self.device)
                ],
                                         dim=1)
            else:
                state_tensor = self.rotate(state_tensor)
        else:
            state_tensor = self.rotate(
                torch.Tensor([
                    state.self_state + ObservableState(0, 0, 0, 0, 0)
                ]).to(self.device))
            if self.with_om:
                occupancy_maps = self.build_occupancy_maps(
                    [ObservableState(0, 0, 0, 0, 0)], state.self_state)
                state_tensor = torch.cat(
                    [state_tensor,
                     occupancy_maps.to(self.device)], dim=1)

        return state_tensor
Ejemplo n.º 14
0
def cb_other_agents(msg):
    # Create list of HUMANS
    global other_agents
    global OTHER_AGENTS_SET
    other_agents = []
    num_agents = len(msg.name)
    for i in range(num_agents):
        radius = 0.3  # Spheres in gazebo
        x = msg.pose[i].position.x
        y = msg.pose[i].position.y
        vx = msg.twist[i].linear.x
        vy = msg.twist[i].linear.y
        other_agents.append(ObservableState(x, y, vx, vy, radius))
    OTHER_AGENTS_SET = True
Ejemplo n.º 15
0
def propagate(state: State, action: Action, time_step: float,
              kinematics: str) -> State:
    if isinstance(state, ObservableState):
        # propagate state of humans
        next_px = state.px + action.vx * time_step
        next_py = state.py + action.vy * time_step
        next_state = ObservableState(next_px, next_py, action.vx, action.vy,
                                     state.radius)
    elif isinstance(state, FullState):
        # propagate state of current agent
        # perform action without rotation
        if kinematics == 'holonomic':
            next_px = state.px + action.vx * time_step
            next_py = state.py + action.vy * time_step
            next_state = FullState(next_px, next_py, action.vx, action.vy,
                                   state.radius, state.gx, state.gy,
                                   state.v_pref, state.theta)
        elif kinematics == 'unicycle':
            # altered for Turtlebot:
            next_theta = state.theta + (action.r * time_step)
            next_vx = action.v * np.cos(next_theta)
            next_vy = action.v * np.sin(next_theta)
            if action.r == 0:
                next_px = state.px + action.v * np.cos(state.theta) * time_step
                next_py = state.py + action.v * np.sin(state.theta) * time_step
            else:
                next_px = state.px + (action.v / action.r) * (np.sin(
                    action.r * time_step + state.theta) - np.sin(state.theta))
                next_py = state.py + (action.v / action.r) * (np.cos(
                    state.theta) - np.cos(action.r * time_step + state.theta))
            next_state = FullState(next_px, next_py, next_vx, next_vy,
                                   state.radius, state.gx, state.gy,
                                   state.v_pref, next_theta)
        else:
            next_theta = state.theta + action.r
            next_vx = action.v * np.cos(next_theta)
            next_vy = action.v * np.sin(next_theta)
            next_px = state.px + next_vx * time_step
            next_py = state.py + next_vy * time_step
            next_state = FullState(next_px, next_py, next_vx, next_vy,
                                   state.radius, state.gx, state.gy,
                                   state.v_pref, next_theta)
    else:
        raise ValueError('Type error')

    return next_state
Ejemplo n.º 16
0
 def step(self, action):
     # convert lucia action to IANEnv action
     ianenv_action = np.array([0., 0., 0.])
     # SOADRL - rotation is dtheta
     # IAN    - rotation is dtheta/dt
     ianenv_action[2] = action.r / self.env._get_dt()
     #  SOADRL - instant rot, then vel
     #  IAN    - vel, then rot
     action_vy = 0.  # SOADRL outputs non-holonomic by default
     ianenv_action[0] = action.v * np.cos(action.r) - action_vy * np.sin(
         action.r)
     ianenv_action[1] = action.v * np.sin(action.r) + action_vy * np.cos(
         action.r)
     # get obs from IANEnv
     obs, rew, done, info = self.env.step(ianenv_action)
     # convert to SOADRL style
     robot_state = FullState(
         self.env.iarlenv.rlenv.virtual_peppers[0].pos[0],
         self.env.iarlenv.rlenv.virtual_peppers[0].pos[1],
         self.env.iarlenv.rlenv.virtual_peppers[0].vel[0],
         self.env.iarlenv.rlenv.virtual_peppers[0].vel[1],
         self.env.iarlenv.rlenv.vp_radii[0],
         self.env.iarlenv.rlenv.agent_goals[0][0],
         self.env.iarlenv.rlenv.agent_goals[0][1],
         self.v_pref,
         self.env.iarlenv.rlenv.virtual_peppers[0].pos[2],
     )
     humans_state = [
         ObservableState(
             human.pos[0],
             human.pos[1],
             human.vel[0],
             human.vel[1],
             r,
         ) for human, r in zip(self.env.iarlenv.rlenv.virtual_peppers[1:],
                               self.env.iarlenv.rlenv.vp_radii[1:])
     ]
     scan = obs[0]
     # for each angular section we take the min of the returns
     downsampled_scan = scan.reshape((-1, self.lidar_upsampling))
     downsampled_scan = np.min(downsampled_scan, axis=1)
     self.last_downsampled_scan = downsampled_scan
     local_map = np.clip(downsampled_scan / self.angular_map_max_range, 0.,
                         1.)
     obs = (robot_state, humans_state, local_map)
     return obs, rew, done, info
Ejemplo n.º 17
0
def cb_real_other_agent(msg):
    global other_agents
    global OTHER_AGENTS_SET
    global other_agent_prev_time_stamp
    x = msg.pose.position.x
    y = msg.pose.position.y
    vx = 0
    vy = 0
    curr_time_stamp = msg.header.stamp.secs + (msg.header.stamp.nsecs * 1e-9)
    if (OTHER_AGENTS_SET):
        vx = (other_agents[0].px - x) / (curr_time_stamp -
                                         other_agent_prev_time_stamp)
        vy = (other_agents[0].py - y) / (curr_time_stamp -
                                         other_agent_prev_time_stamp)
    other_agent_prev_time_stamp = curr_time_stamp
    other_agents = []
    other_agents.append(ObservableState(x, y, vx, vy, 0.3))
    OTHER_AGENTS_SET = True
Ejemplo n.º 18
0
    def step(self, action, update=True):
        """
        Compute actions for all agents, detect collision, update environment and return (ob, reward, done, info)
        """
        if self.centralized_planning:
            agent_states = [human.get_full_state() for human in self.humans]
            if self.robot.visible:
                agent_states.append(self.robot.get_full_state())
                human_actions = self.centralized_planner.predict(
                    agent_states)[:-1]
            else:
                human_actions = self.centralized_planner.predict(agent_states)
        else:
            human_actions = []
            for human in self.humans:
                ob = self.compute_observation_for(human)
                human_actions.append(human.act(ob))

        # collision detection
        dmin = float('inf')
        collision = False
        for i, human in enumerate(self.humans):
            px = human.px - self.robot.px
            py = human.py - self.robot.py
            if self.robot.kinematics == 'holonomic':
                vx = human.vx - action.vx
                vy = human.vy - action.vy
            else:
                vx = human.vx - action.v * np.cos(action.r + self.robot.theta)
                vy = human.vy - action.v * np.sin(action.r + self.robot.theta)
            ex = px + vx * self.time_step
            ey = py + vy * self.time_step
            # closest distance between boundaries of two agents
            closest_dist = point_to_segment_dist(
                px, py, ex, ey, 0, 0) - human.radius - self.robot.radius
            if closest_dist < 0:
                collision = True
                logging.debug(
                    "Collision: distance between robot and p{} is {:.2E} at time {:.2E}"
                    .format(human.id, closest_dist, self.global_time))
                break
            elif closest_dist < dmin:
                dmin = closest_dist

        # collision detection between humans
        human_num = len(self.humans)
        for i in range(human_num):
            for j in range(i + 1, human_num):
                dx = self.humans[i].px - self.humans[j].px
                dy = self.humans[i].py - self.humans[j].py
                dist = (dx**2 + dy**2)**(
                    1 / 2) - self.humans[i].radius - self.humans[j].radius
                if dist < 0:
                    # detect collision but don't take humans' collision into account
                    logging.debug('Collision happens between humans in step()')

        # check if reaching the goal
        end_position = np.array(
            self.robot.compute_position(action, self.time_step))
        reaching_goal = norm(
            end_position -
            np.array(self.robot.get_goal_position())) < self.robot.radius

        if self.global_time >= self.time_limit - 1:
            reward = 0
            done = True
            info = Timeout()
        elif collision:
            reward = self.collision_penalty
            done = True
            info = Collision()
        elif reaching_goal:
            reward = self.success_reward
            done = True
            info = ReachGoal()
        elif dmin < self.discomfort_dist:
            # adjust the reward based on FPS
            reward = (dmin - self.discomfort_dist
                      ) * self.discomfort_penalty_factor * self.time_step
            done = False
            info = Discomfort(dmin)
        else:
            reward = 0
            done = False
            info = Nothing()

        if update:
            # store state, action value and attention weights
            if hasattr(self.robot.policy, 'action_values'):
                self.action_values.append(self.robot.policy.action_values)
            if hasattr(self.robot.policy, 'get_attention_weights'):
                self.attention_weights.append(
                    self.robot.policy.get_attention_weights())
            if hasattr(self.robot.policy, 'get_matrix_A'):
                self.As.append(self.robot.policy.get_matrix_A())
            if hasattr(self.robot.policy, 'get_feat'):
                self.feats.append(self.robot.policy.get_feat())
            if hasattr(self.robot.policy, 'get_X'):
                self.Xs.append(self.robot.policy.get_X())
            if hasattr(self.robot.policy, 'traj'):
                self.trajs.append(self.robot.policy.get_traj())

            # update all agents
            self.robot.step(action)

            if self.global_frame == len(self.trajnet_samples):
                self.global_frame = 0

            if self.trajnet:
                sample = self.trajnet_samples[self.global_frame]
                for i in range(len(sample)):
                    dist = np.sqrt(
                        np.square(self.robot.px - float(sample[i][1])) +
                        np.square(self.robot.py - float(sample[i][2])))
                    sample[i].append(dist)
                sample = sorted(sample, key=itemgetter(3))
                sample = sample[:self.human_num - 1]

                for i in range(human_num):
                    if len(sample) > i:
                        self.humans[i].human_id = sample[i][0]
                        new_position = [
                            float(sample[i][1]),
                            float(sample[i][2])
                        ]
                        new_velocity = [(new_position[0] - self.humans[i].px) /
                                        self.time_step,
                                        (new_position[1] - self.humans[i].py) /
                                        self.time_step]

                        self.humans[i].set_position(new_position)
                        self.humans[i].set_velocity(new_velocity)
                    else:
                        self.humans[i].step(human_actions[i])
            else:
                for human, action in zip(self.humans, human_actions):
                    human.step(action)
                    if self.nonstop_human and human.reached_destination():
                        self.generate_human(human)

            self.global_frame += 1

            self.global_time += self.time_step
            self.states.append([
                self.robot.get_full_state(),
                [human.get_full_state() for human in self.humans],
                [human.id for human in self.humans]
            ])
            self.robot_actions.append(action)
            self.rewards.append(reward)

            # compute the observation
            if self.robot.sensor == 'coordinates':
                ob = self.compute_observation_for(self.robot)
            elif self.robot.sensor == 'RGB':
                raise NotImplementedError
        else:
            if self.robot.sensor == 'coordinates':
                if self.trajnet:
                    index = self.global_frame + 1
                    if index > len(self.trajnet_samples):
                        index = 0
                    sample = self.trajnet_samples[index]

                    for i in range(len(sample)):
                        dist = np.sqrt(
                            np.square(self.robot.px - float(sample[i][1])) +
                            np.square(self.robot.py - float(sample[i][2])))
                        sample[i].append(dist)
                    sample = sorted(sample, key=itemgetter(3))
                    sample = sample[:self.human_num - 1]
                    ob = []
                    for i in range(human_num):
                        if len(sample) > i:
                            self.humans[i].human_id = sample[i][0]
                            new_position = [
                                float(sample[i][1]),
                                float(sample[i][2])
                            ]
                            new_velocity = [
                                (new_position[0] - self.humans[i].px) /
                                self.time_step,
                                (new_position[1] - self.humans[i].py) /
                                self.time_step
                            ]
                        ob.append(
                            ObservableState(self.humans[i].px,
                                            self.humans[i].py,
                                            self.humans[i].vx,
                                            self.humans[i].vy,
                                            self.humans[i].raduys))
                else:
                    ob = [
                        human.get_next_observable_state(action)
                        for human, action in zip(self.humans, human_actions)
                    ]
            elif self.robot.sensor == 'RGB':
                raise NotImplementedError

        return ob, reward, done, info
Ejemplo n.º 19
0
 def get_observable_state(self):
     return ObservableState(self.px, self.py, self.vx, self.vy, self.radius)
Ejemplo n.º 20
0
    def predict(self, state):
        """
        A base class for all methods that takes pairwise joint state as input to value network.
        The input to the value network is always of shape (batch_size, # humans, rotated joint state length)

        """
        if self.phase is None or self.device is None:
            raise AttributeError('Phase, device attributes have to be set!')
        if self.phase == 'train' and self.epsilon is None:
            raise AttributeError(
                'Epsilon attribute has to be set in training phase')

        if self.reach_destination(state):
            return ActionXY(
                0, 0) if self.kinematics == 'holonomic' else ActionRot(0, 0)
        if self.action_space is None:
            self.build_action_space(state.self_state.v_pref)

        occupancy_maps = None
        probability = np.random.random()
        if self.phase == 'train' and probability < self.epsilon:
            max_action = self.action_space[np.random.choice(
                len(self.action_space))]
        else:
            self.action_values = list()
            max_value = float('-inf')
            max_action = None
            for action in self.action_space:
                next_self_state = self.propagate(state.self_state, action)
                if self.query_env:
                    next_human_states, reward, done, info = self.env.onestep_lookahead(
                        action)
                else:
                    next_human_states = [
                        self.propagate(
                            human_state,
                            ActionXY(human_state.vx, human_state.vy))
                        for human_state in state.human_states
                    ]
                    reward = self.compute_reward(next_self_state,
                                                 next_human_states)
                if len(next_human_states) == 0:
                    next_human_states = [ObservableState(0, 0, 0, 0, 0)]
                batch_next_states = torch.cat([
                    torch.Tensor([next_self_state + next_human_state]).to(
                        self.device) for next_human_state in next_human_states
                ],
                                              dim=0)
                rotated_batch_input = self.rotate(batch_next_states).unsqueeze(
                    0)
                if self.with_om:
                    if occupancy_maps is None:
                        occupancy_maps = self.build_occupancy_maps(
                            next_human_states, state.self_state).unsqueeze(0)
                    rotated_batch_input = torch.cat(
                        [rotated_batch_input,
                         occupancy_maps.to(self.device)],
                        dim=2)
                # VALUE UPDATE
                next_state_value = self.model(rotated_batch_input).data.item()
                value = reward + pow(
                    self.gamma, self.time_step *
                    state.self_state.v_pref) * next_state_value
                self.action_values.append(value)
                if value > max_value:
                    max_value = value
                    max_action = action
            if max_action is None:
                raise ValueError('Value network is not well trained. ')

        if self.phase == 'train':
            self.last_state = self.transform(state)

        return max_action
Ejemplo n.º 21
0
def main():
    parser = argparse.ArgumentParser('Parse configuration file')
    parser.add_argument('--env_config', type=str, default='configs/env.config')
    parser.add_argument('--policy_config',
                        type=str,
                        default='configs/policy.config')
    parser.add_argument('--policy', type=str, default='orca')
    parser.add_argument('--model_dir', type=str, default=None)
    parser.add_argument('--il', default=False, action='store_true')
    parser.add_argument('--gpu', default=False, action='store_true')
    parser.add_argument('--visualize', default=True, action='store_true')
    parser.add_argument('--phase', type=str, default='test')
    parser.add_argument('--test_case', type=int, default=0)
    parser.add_argument('--square', default=False, action='store_true')
    parser.add_argument('--circle', default=False, action='store_true')
    parser.add_argument('--video_file', type=str, default=None)
    parser.add_argument('--traj', default=False, action='store_true')
    parser.add_argument('--plot', default=False, action='store_true')
    args = parser.parse_args()

    if args.model_dir is not None:
        env_config_file = os.path.join(args.model_dir,
                                       os.path.basename(args.env_config))
        policy_config_file = os.path.join(args.model_dir,
                                          os.path.basename(args.policy_config))
        if args.il:
            model_weights = os.path.join(args.model_dir, 'il_model.pth')
        else:
            if os.path.exists(
                    os.path.join(args.model_dir, 'resumed_rl_model.pth')):
                model_weights = os.path.join(args.model_dir,
                                             'resumed_rl_model.pth')
            else:
                model_weights = os.path.join(args.model_dir, 'rl_model.pth')
    else:
        env_config_file = args.env_config
        policy_config_file = args.env_config

    # configure logging and device
    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s, %(levelname)s: %(message)s',
                        datefmt="%Y-%m-%d %H:%M:%S")
    device = torch.device(
        "cuda:0" if torch.cuda.is_available() and args.gpu else "cpu")
    logging.info('Using device: %s', device)

    # configure policy
    policy = policy_factory[args.policy]()
    policy_config = configparser.RawConfigParser()
    policy_config.read(policy_config_file)
    policy.configure(policy_config)
    if policy.trainable:
        if args.model_dir is None:
            parser.error(
                'Trainable policy must be specified with a model weights directory'
            )
        policy.get_model().load_state_dict(
            torch.load(model_weights, map_location=torch.device('cpu')))

    # configure environment
    env_config = configparser.RawConfigParser()
    env_config.read(env_config_file)
    env = gym.make('CrowdSim-v0')
    env.configure(env_config)
    if args.square:
        env.test_sim = 'square_crossing'
    if args.circle:
        env.test_sim = 'circle_crossing'
    robot = Robot(env_config, 'robot')
    robot.set_policy(policy)
    env.set_robot(robot)
    explorer = Explorer(env, robot, device, gamma=0.9)

    policy.set_phase(args.phase)
    policy.set_device(device)
    # set safety space for ORCA in non-cooperative simulation
    if isinstance(robot.policy, ORCA):
        if robot.visible:
            robot.policy.safety_space = 0
        else:
            # because invisible case breaks the reciprocal assumption
            # adding some safety space improves ORCA performance. Tune this value based on your need.
            robot.policy.safety_space = 0
        logging.info('ORCA agent buffer: %f', robot.policy.safety_space)

    policy.set_env(env)
    robot.print_info()

    if args.visualize:

        obs = env.reset(args.phase, args.test_case)
        done = False
        last_pos = np.array(robot.get_position())
        policy_time = 0.0
        numFrame = 0

        dist = 20.0
        obs_flg = 0

        sim_time = False
        while sim_time == False:
            samples, robot_state, sim_time = pc2obs.pc2obs(
                voxel_size=voxel_size)
        t1 = float(sim_time)
        while (dist > 0.8):
            #t1 = time.time()
            env.humans = []
            samples, robot_state, sim_time = pc2obs.pc2obs(
                voxel_size=voxel_size)
            if type(samples) == type(False):
                print("Not Connect")
                continue
            dist = math.sqrt((GOAL_X - robot_state[0])**2 +
                             (GOAL_Y - robot_state[1])**2)
            if obs_flg == 0 and dist < 10:
                os.system('sh ./init.sh')
                obs_flg = 1
            print("dist: {}".format(dist))
            # rotate and shift obs position
            t2 = time.time()
            yaw = robot_state[2]
            rot_matrix = np.array([[math.cos(yaw),
                                    math.sin(yaw)],
                                   [-math.sin(yaw),
                                    math.cos(yaw)]])
            #samples = np.array([sample + robot_state[0:2] for sample in samples[:,0:2]])

            if len(samples) == 1:
                samples = np.array(
                    [np.dot(rot_matrix, samples[0][0:2]) + robot_state[0:2]])
                print(samples)
            elif len(samples) > 1:
                samples = np.array([
                    np.dot(rot_matrix, sample) + robot_state[0:2]
                    for sample in samples[:, 0:2]
                ])

            obs_position_list = samples
            obs = []

            for ob in obs_position_list:
                human = Human(env.config, 'humans')
                # px, py, gx, gy, vx, vy, theta
                human.set(ob[0], ob[1], ob[0], ob[1], 0, 0, 0)
                env.humans.append(human)
                # px, py, vx, vy, radius
                obs.append(ObservableState(ob[0], ob[1], 0, 0, voxel_size / 2))
            if len(obs_position_list) == 0:
                human = Human(env.config, 'humans')
                # SARL, CADRL
                human.set(0, -10, 0, -10, 0, 0, 0)
                # LSTM
                #human.set(robot_state[0]+10, robot_state[1]+10, robot_state[0]+10, robot_state[1]+10 , 0, 0, 0)
                env.humans.append(human)
                # SARL, CADRL
                obs.append(ObservableState(0, -10, 0, 0, voxel_size / 2))
                # LSTM
                #obs.append(ObservableState(robot_state[0]+10, robot_state[1]+10, 0, 0, voxel_size/2))
            robot.set_position(robot_state)
            robot.set_velocity([math.sin(yaw), math.cos(yaw)])
            #print(obs)
            action = robot.act(obs)
            obs, _, done, info = env.step(action)
            current_pos = np.array(robot.get_position())
            current_vel = np.array(robot.get_velocity())
            #print("Velocity: {}, {}".format(current_vel[0], current_vel[1]))
            logging.debug(
                'Speed: %.2f',
                np.linalg.norm(current_pos - last_pos) / robot.time_step)
            last_pos = current_pos
            policy_time += time.time() - t1
            numFrame += 1
            #print(t2-t1, time.time() - t2)

            diff_angle = (-yaw + math.atan2(current_vel[0], current_vel[1]))
            if diff_angle > math.pi:
                diff_angle = diff_angle - 2 * math.pi
            elif diff_angle < -math.pi:
                diff_angle = diff_angle + 2 * math.pi
            #print("diff_angle: {}, {}, {}".format(diff_angle, yaw ,-math.atan2(current_vel[0], current_vel[1])))
            if diff_angle < -0.7:
                direc = 2  # turn left
            elif diff_angle > 0.7:
                direc = 3  # turn right
            else:
                direc = 1  # go straight
            # GoEasy(direc)
            vx = SPEED * math.sqrt(current_vel[0]**2 + current_vel[1]**2)
            if diff_angle > 0:
                v_ang = ANGULAR_SPEED * min(diff_angle / (math.pi / 2), 1)
            else:
                v_ang = ANGULAR_SPEED * max(diff_angle / (math.pi / 2), -1)
            print(vx, -v_ang)
            easyGo.mvCurve(vx, -v_ang)
            if args.plot:
                plt.scatter(current_pos[0], current_pos[1], label='robot')
                plt.arrow(current_pos[0],
                          current_pos[1],
                          current_vel[0],
                          current_vel[1],
                          width=0.05,
                          fc='g',
                          ec='g')
                plt.arrow(current_pos[0],
                          current_pos[1],
                          math.sin(yaw),
                          math.cos(yaw),
                          width=0.05,
                          fc='r',
                          ec='r')
                if len(samples) == 1:
                    plt.scatter(samples[0][0],
                                samples[0][1],
                                label='obstacles')
                elif len(samples) > 1:
                    plt.scatter(samples[:, 0],
                                samples[:, 1],
                                label='obstacles')
                plt.xlim(-6.5, 6.5)
                plt.ylim(-9, 4)
                plt.legend()
                plt.title("gazebo test")
                plt.xlabel("x (m)")
                plt.ylabel("y (m)")
                plt.pause(0.001)
                plt.cla()
                plt.clf()
            print("NAV TIME {}".format(float(sim_time) - t1))
            time.sleep(0.5)
        print("NAV TIME {}".format(float(sim_time) - t1))
        easyGo.stop()
        plt.close()
        print("Average took {} sec per iteration, {} Frame".format(
            policy_time / numFrame, numFrame))
    else:
        explorer.run_k_episodes(env.case_size[args.phase],
                                args.phase,
                                print_failure=True)