def estimate_reward(self, state, action):
        """ If the time step is small enough, it's okay to model agent as linear movement during this period

        """
        # collision detection
        if isinstance(state, list) or isinstance(state, tuple):
            state = tensor_to_joint_state(state)
        human_states = state.human_states
        robot_state = state.robot_state

        dmin = float('inf')
        collision = False
        for i, human in enumerate(human_states):
            px = human.px - robot_state.px
            py = human.py - robot_state.py
            if self.kinematics == 'holonomic':
                vx = human.vx - action.vx
                vy = human.vy - action.vy
            else:
                vx = human.vx - action.v * np.cos(action.r + robot_state.theta)
                vy = human.vy - action.v * np.sin(action.r + robot_state.theta)
            ex = px + vx * self.time_step
            ey = py + vy * self.time_step
            # closest distance between boundaries of two agents
            closest_dist = point_to_segment_dist(
                px, py, ex, ey, 0, 0) - human.radius - robot_state.radius
            if closest_dist < 0:
                collision = True
                break
            elif closest_dist < dmin:
                dmin = closest_dist

        # check if reaching the goal
        if self.kinematics == 'holonomic':
            px = robot_state.px + action.vx * self.time_step
            py = robot_state.py + action.vy * self.time_step
        else:
            theta = robot_state.theta + action.r
            px = robot_state.px + np.cos(theta) * action.v * self.time_step
            py = robot_state.py + np.sin(theta) * action.v * self.time_step

        end_position = np.array((px, py))
        reaching_goal = norm(
            end_position -
            np.array([robot_state.gx, robot_state.gy])) < robot_state.radius

        if collision:
            reward = -0.25
        elif reaching_goal:
            reward = 1
        elif dmin < 0.2:
            # adjust the reward based on FPS
            reward = (dmin - 0.2) * 0.5 * self.time_step
        else:
            reward = 0

        return reward
    def step(self, action, simple=True):
        """
        Compute actions for all agents, detect collision, update environment and return (ob, reward, done, info)

        """
        human_actions = []
        for human in self.humans:
            # observation for humans is always coordinates
            ob = [
                other_human.get_observable_state()
                for other_human in self.humans if other_human != human
            ]
            if self.robot.visible:
                ob += [self.robot.get_observable_state()]
            human_actions.append(human.act(ob))

        # collision detection
        dmin = float('inf')
        collision = False
        for i, human in enumerate(self.humans):
            px = human.px - self.robot.px
            py = human.py - self.robot.py
            if self.robot.kinematics == 'holonomic':
                vx = human.vx - action.vx
                vy = human.vy - action.vy
            else:
                vx = human.vx - action.v * np.cos(action.r + self.robot.theta)
                vy = human.vy - action.v * np.sin(action.r + self.robot.theta)
            ex = px + vx * self.time_step
            ey = py + vy * self.time_step
            # closest distance between boundaries of two agents
            closest_dist = point_to_segment_dist(
                px, py, ex, ey, 0, 0) - human.radius - self.robot.radius
            if closest_dist < 0:
                collision = True
                # logging.debug("Collision: distance between robot and p{} is {:.2E}".format(i, closest_dist))
                break
            elif closest_dist < dmin:
                dmin = closest_dist

        # collision detection between humans
        human_num = len(self.humans)
        for i in range(human_num):
            for j in range(i + 1, human_num):
                dx = self.humans[i].px - self.humans[j].px
                dy = self.humans[i].py - self.humans[j].py
                dist = (dx**2 + dy**2)**(
                    1 / 2) - self.humans[i].radius - self.humans[j].radius

        # check if reaching the goal
        end_position = np.array(
            self.robot.compute_position(action, self.time_step))
        dist_goal = norm(end_position -
                         np.array(self.robot.get_goal_position()))
        reaching_goal = dist_goal < self.robot.radius

        if self.global_time >= self.time_limit - 1:
            reward = 0
            done = True
            info = Timeout()
        elif collision:
            reward = self.collision_penalty
            done = True
            info = Collision()
        elif reaching_goal:
            reward = self.success_reward
            done = True
            info = ReachGoal()
        elif dmin < self.discomfort_dist:
            # only penalize agent for getting too close if it's visible
            # adjust the reward based on FPS
            reward = (
                dmin - self.discomfort_dist
            ) * self.discomfort_penalty_factor * self.time_step - self.time_r
            done = False
            info = Danger(dmin)
        else:
            reward = -0.1 * dist_goal
            done = False
            info = Nothing()

        # store state, action value and attention weights
        self.states.append([
            self.robot.get_full_state(),
            [human.get_full_state() for human in self.humans]
        ])
        if hasattr(self.robot.policy, 'action_values'):
            self.action_values.append(self.robot.policy.action_values)
        if hasattr(self.robot.policy, 'get_attention_weights'):
            self.attention_weights.append(
                self.robot.policy.get_attention_weights())

        # update all agents
        self.robot.step(action)
        for i, human_action in enumerate(human_actions):
            self.humans[i].step(human_action)
        self.global_time += self.time_step
        for i, human in enumerate(self.humans):
            # only record the first time the human reaches the goal
            if self.human_times[i] == 0 and human.reached_destination():
                self.human_times[i] = self.global_time

        # compute the observation
        if simple:
            return self.robot.get_full_state()

        if self.robot.sensor == 'coordinates':
            ob = [
                self.robot.get_full_state() + human.get_observable_state()
                for human in self.humans
            ]
        elif self.robot.sensor == 'RGB':
            raise NotImplementedError

        return ob, reward, done, info
Exemple #3
0
    def step(self, action, update=True, rebuild=True):
        """
        Compute actions for all agents, detect collision, update environment and return (ob, reward, done, info)

        """
        human_actions = []
        for human in self.humans:
            # observation for humans is always coordinates
            ob = [other_human.get_observable_state() for other_human in self.humans if other_human != human]
            if self.robot.visible:
                ob += [self.robot.get_observable_state()]
            human_actions.append(human.act(ob))

        # collision detection
        dmin = float('inf')
        collision = False
        for i, human in enumerate(self.humans):
            px = human.px - self.robot.px
            py = human.py - self.robot.py
            if self.robot.kinematics == 'holonomic':
                vx = human.vx - action.vx
                vy = human.vy - action.vy
            else:
                vx = human.vx - action.v * np.cos(action.r + self.robot.theta)
                vy = human.vy - action.v * np.sin(action.r + self.robot.theta)
            ex = px + vx * self.time_step
            ey = py + vy * self.time_step
            # closest distance between boundaries of two agents
            closest_dist = point_to_segment_dist(px, py, ex, ey, 0, 0) - human.radius - self.robot.radius
            if closest_dist < 0:
                collision = True
                # logging.debug("Collision: distance between robot and p{} is {:.2E}".format(i, closest_dist))
                break
            elif closest_dist < dmin:
                dmin = closest_dist

        # begin = time()
        self.map.build_map_cpu(self.humans, rebuild)
        # if not update:
        #     print("rebuild: ", rebuild, "build map time: ", 81 * (time() - begin))

        agent_fullstate = FullState(self.robot.px + action.vx * self.agent_timestep,
                                    self.robot.py + action.vy * self.agent_timestep, self.robot.vx, self.robot.vy,
                                    self.robot.radius, self.robot.gx, self.robot.gy, self.robot.v_pref,
                                    self.robot.theta)

        # begin = time()
        collision_probabbility = self.map.compute_occupied_probability(agent_fullstate)
        # if not update:
        #     print("rebuild: ", rebuild, "build map time: ", 81 * (time() - begin))

        stationary_dist = ((self.robot.py + self.circle_radius)**2 + (self.robot.px**2))**(1 / 2)

        # collision detection between humans
        human_num = len(self.humans)
        for i in range(human_num):
            for j in range(i + 1, human_num):
                dx = self.humans[i].px - self.humans[j].px
                dy = self.humans[i].py - self.humans[j].py
                dist = (dx**2 + dy**2)**(1 / 2) - self.humans[i].radius - self.humans[j].radius
                if dist < 0:
                    # detect collision but don't take humans' collision into account
                    logging.debug('Collision happens between humans in step()')

        # check if reaching the goal
        end_position = np.array(self.robot.compute_position(action, self.time_step))
        reaching_goal = norm(end_position - np.array(self.robot.get_goal_position())) < self.robot.radius

        stationary_state = False
        if self.robot.kinematics == 'holonomic':
            if abs(action.vx) <= 0.0001 and abs(action.vy) <= 0.0001:
                stationary_state = True
        else:
            if abs(action.v) <= 0.0001:
                stationary_state = True

        if self.global_time >= self.time_limit - 1:
            reward = 0
            done = True
            info = Timeout()
        elif collision:
            reward = self.collision_penalty
            done = True
            info = Collision()
        elif reaching_goal:
            reward = self.success_reward
            done = True
            info = ReachGoal()
        else:
            reward = -0.25 * collision_probabbility * self.reward_increment
            done = False
            if dmin < self.discomfort_dist:
                info = Danger(dmin)
            else:
                info = Nothing()

        if update:
            # store state, action value and attention weights
            self.states.append([self.robot.get_full_state(), [human.get_full_state() for human in self.humans]])
            if hasattr(self.robot.policy, 'action_values'):
                self.action_values.append(self.robot.policy.action_values)
            if hasattr(self.robot.policy, 'get_attention_weights'):
                self.attention_weights.append(self.robot.policy.get_attention_weights())

            # update all agents
            self.robot.step(action)
            for i, human_action in enumerate(human_actions):
                self.humans[i].step(human_action)
            self.global_time += self.time_step
            for i, human in enumerate(self.humans):
                # only record the first time the human reaches the goal
                if self.human_times[i] == 0 and human.reached_destination():
                    self.human_times[i] = self.global_time

            # compute the observation
            if self.robot.sensor == 'coordinates':
                ob = [human.get_observable_state() for human in self.humans]
            elif self.robot.sensor == 'RGB':
                raise NotImplementedError
        else:
            if self.robot.sensor == 'coordinates':
                ob = [human.get_next_observable_state(action) for human, action in zip(self.humans, human_actions)]
            elif self.robot.sensor == 'RGB':
                raise NotImplementedError

        # self.agent_prev_vx = action.vx
        # self.agent_prev_vy = action.vy
        if self.robot.kinematics == 'holonomic':
            self.agent_prev_vx = action.vx
            self.agent_prev_vy = action.vy
        else:
            self.agent_prev_vx = action.v * np.cos(action.r + self.robot.theta)
            self.agent_prev_vy = action.v * np.sin(action.r + self.robot.theta)

        return ob, reward, done, info
    def step(self, action, update=True):
        """
        Compute actions for all agents, detect collision, update environment and return (ob, reward, done, info)
        """
        # get human actions
        if self.centralized_planning:
            agent_states = [human.get_full_state() for human in self.humans]
            if self.robot.visible:
                agent_states.append(self.robot.get_full_state())
                human_actions = self.centralized_planner.predict(agent_states)[:-1]
            else:
                human_actions = self.centralized_planner.predict(agent_states)
        else:
            human_actions = []
            for human in self.humans:
                ob = self.compute_observation_for(human)
                human_actions.append(human.act(ob))

        # social score violation calculation
        heading_rect_violations = 0
        robot_rect = AgentHeadingRect(self.robot.px, self.robot.py, self.robot.radius, self.robot.vx, self.robot.vy, self.robot.kinematics, action)
        for human in self.humans:
            human_rect = AgentHeadingRect(human.px, human.py, human.radius, human.vx, human.vy, human.kinematics)
            if robot_rect.intersects(human_rect):
                heading_rect_violations += 1
            

        # collision detection
        dmin = float('inf')
        collision = False
        
        for i, human in enumerate(self.humans):
            px = human.px - self.robot.px
            py = human.py - self.robot.py
            if self.robot.kinematics == 'holonomic':
                vx = human.vx - action.vx
                vy = human.vy - action.vy
            else:
                vx = human.vx - action.v * np.cos(action.r + self.robot.theta)
                vy = human.vy - action.v * np.sin(action.r + self.robot.theta)
            ex = px + vx * self.time_step
            ey = py + vy * self.time_step
            # closest distance between boundaries of two agents
            closest_dist = point_to_segment_dist(px, py, ex, ey, 0, 0) - human.radius - self.robot.radius
            if closest_dist < 0:
                collision = True
                logging.debug("Collision: distance between robot and p{} is {:.2E} at time {:.2E}".format(human.id, closest_dist, self.global_time))
                break
            elif closest_dist < dmin:
                dmin = closest_dist

        # collision detection between humans
        human_num = len(self.humans)
        for i in range(human_num):
            for j in range(i + 1, human_num):
                dx = self.humans[i].px - self.humans[j].px
                dy = self.humans[i].py - self.humans[j].py
                dist = (dx ** 2 + dy ** 2) ** (1 / 2) - self.humans[i].radius - self.humans[j].radius
                if dist < 0:
                    # detect collision but don't take humans' collision into account
                    logging.debug('Collision happens between humans in step()')

        # check if reaching the goal
        end_position = np.array(self.robot.compute_position(action, self.time_step))
        reaching_goal = norm(end_position - np.array(self.robot.get_goal_position())) < self.robot.radius

        # calculate step jerk cost
        ax = action.vx - self.robot.vx
        ay = action.vy - self.robot.vy
        d_ax = ax - self.last_acceleration[0]
        d_ay = ay - self.last_acceleration[1]
        jerk_cost = d_ax**2 + d_ay**2
        self.last_acceleration = (ax, ay)

        # check for left-hand / right-hand rules
        side_preference = None
        if self.test_scenario in self.all_two_agent_scenarios:
            h = self.humans[0]
            # If agent's y position is within the y range occupied by the other human (they are on the same vertical position)
            if ((h.py - h.radius) <= end_position[1] <= (h.py + h.radius)): 
                if end_position[0] < h.px:
                    side_preference = 0
                else:
                    side_preference = 1

        # state information logging
        info = {}
        info['aggregated_time'] = 1
        info['min_separation'] = dmin
        info['social_violation_cnt'] = heading_rect_violations
        info['jerk_cost'] = jerk_cost
        info['speed'] = math.sqrt(action.vx**2 + action.vy**2)
        info['side_preference'] = side_preference

        if dmin < self.min_personal_space:
            info['personal_violation_cnt'] = 1
        else:
            info['personal_violation_cnt'] = 0

        if self.global_time >= self.time_limit - 1:
            info['aggregated_time'] = math.inf
            reward = 0
            done = True
            info['event'] = Timeout()
        elif collision:
            info['aggregated_time'] = math.inf
            reward = self.collision_penalty
            done = True
            info['event'] = Collision()
        elif reaching_goal:
            reward = self.success_reward
            done = True
            info['event'] = ReachGoal()
        elif dmin < self.discomfort_dist:
            # adjust the reward based on FPS
            reward = (dmin - self.discomfort_dist) * self.discomfort_penalty_factor * self.time_step
            done = False
            info['event'] = Discomfort(dmin)
        else:
            reward = 0
            done = False
            info['event'] = Nothing()
        
        for human in self.humans:
            if not human.reached_destination():
                info['aggregated_time'] += 1

        # update environment
        if update:
            # store state, action value and attention weights
            if hasattr(self.robot.policy, 'action_values'):
                self.action_values.append(self.robot.policy.action_values)
            if hasattr(self.robot.policy, 'get_attention_weights'):
                self.attention_weights.append(self.robot.policy.get_attention_weights())
            if hasattr(self.robot.policy, 'get_matrix_A'):
                self.As.append(self.robot.policy.get_matrix_A())
            if hasattr(self.robot.policy, 'get_feat'):
                self.feats.append(self.robot.policy.get_feat())
            if hasattr(self.robot.policy, 'get_X'):
                self.Xs.append(self.robot.policy.get_X())
            if hasattr(self.robot.policy, 'traj'):
                self.trajs.append(self.robot.policy.get_traj())

            # update all agents
            self.robot.step(action)
            for human, action in zip(self.humans, human_actions):
                human.step(action)
                if self.nonstop_human and human.reached_destination():
                    self.generate_human(human)

            self.global_time += self.time_step
            self.states.append([self.robot.get_full_state(), [human.get_full_state() for human in self.humans],
                                [human.id for human in self.humans]])
            self.robot_actions.append(action)
            self.rewards.append(reward)
            self.infos.append(info)

            # compute the observation
            if self.robot.sensor == 'coordinates':
                ob = self.compute_observation_for(self.robot)
            elif self.robot.sensor == 'RGB':
                raise NotImplementedError
        else:
            if self.robot.sensor == 'coordinates':
                ob = [human.get_next_observable_state(action) for human, action in zip(self.humans, human_actions)]
            elif self.robot.sensor == 'RGB':
                raise NotImplementedError

        return ob, reward, done, info
    def step(self, action, update=True):
        """
        Compute actions for all agents, detect collision, update environment and return (ob, reward, done, info)
        """
        if self.centralized_planning:
            agent_states = [human.get_full_state() for human in self.humans]
            if self.robot.visible:
                agent_states.append(self.robot.get_full_state())
                human_actions = self.centralized_planner.predict(
                    agent_states)[:-1]
            else:
                human_actions = self.centralized_planner.predict(agent_states)
        else:
            human_actions = []
            for human in self.humans:
                ob = self.compute_observation_for(human)
                human_actions.append(human.act(ob))

        # collision detection
        dmin = float('inf')
        collision = False
        for i, human in enumerate(self.humans):
            px = human.px - self.robot.px
            py = human.py - self.robot.py
            if self.robot.kinematics == 'holonomic':
                vx = human.vx - action.vx
                vy = human.vy - action.vy
            else:
                vx = human.vx - action.v * np.cos(action.r + self.robot.theta)
                vy = human.vy - action.v * np.sin(action.r + self.robot.theta)
            ex = px + vx * self.time_step
            ey = py + vy * self.time_step
            # closest distance between boundaries of two agents
            closest_dist = point_to_segment_dist(
                px, py, ex, ey, 0, 0) - human.radius - self.robot.radius
            if closest_dist < 0:
                collision = True
                logging.debug(
                    "Collision: distance between robot and p{} is {:.2E} at time {:.2E}"
                    .format(human.id, closest_dist, self.global_time))
                break
            elif closest_dist < dmin:
                dmin = closest_dist

        # collision detection between humans
        human_num = len(self.humans)
        for i in range(human_num):
            for j in range(i + 1, human_num):
                dx = self.humans[i].px - self.humans[j].px
                dy = self.humans[i].py - self.humans[j].py
                dist = (dx**2 + dy**2)**(
                    1 / 2) - self.humans[i].radius - self.humans[j].radius
                if dist < 0:
                    # detect collision but don't take humans' collision into account
                    logging.debug('Collision happens between humans in step()')

        # check if reaching the goal
        end_position = np.array(
            self.robot.compute_position(action, self.time_step))
        reaching_goal = norm(
            end_position -
            np.array(self.robot.get_goal_position())) < self.robot.radius

        if self.global_time >= self.time_limit - 1:
            reward = 0
            done = True
            info = Timeout()
        elif collision:
            reward = self.collision_penalty
            done = True
            info = Collision()
        elif reaching_goal:
            reward = self.success_reward
            done = True
            info = ReachGoal()
        elif dmin < self.discomfort_dist:
            # adjust the reward based on FPS
            reward = (dmin - self.discomfort_dist
                      ) * self.discomfort_penalty_factor * self.time_step
            done = False
            info = Discomfort(dmin)
        else:
            reward = 0
            done = False
            info = Nothing()

        if update:
            # store state, action value and attention weights
            if hasattr(self.robot.policy, 'action_values'):
                self.action_values.append(self.robot.policy.action_values)
            if hasattr(self.robot.policy, 'get_attention_weights'):
                self.attention_weights.append(
                    self.robot.policy.get_attention_weights())
            if hasattr(self.robot.policy, 'get_matrix_A'):
                self.As.append(self.robot.policy.get_matrix_A())
            if hasattr(self.robot.policy, 'get_feat'):
                self.feats.append(self.robot.policy.get_feat())
            if hasattr(self.robot.policy, 'get_X'):
                self.Xs.append(self.robot.policy.get_X())
            if hasattr(self.robot.policy, 'traj'):
                self.trajs.append(self.robot.policy.get_traj())

            # update all agents
            self.robot.step(action)
            for human, action in zip(self.humans, human_actions):
                human.step(action)
                if self.nonstop_human and human.reached_destination():
                    self.generate_human(human)

            self.global_time += self.time_step
            self.states.append([
                self.robot.get_full_state(),
                [human.get_full_state() for human in self.humans],
                [human.id for human in self.humans]
            ])
            self.robot_actions.append(action)
            self.rewards.append(reward)

            # compute the observation
            if self.robot.sensor == 'coordinates':
                ob = self.compute_observation_for(self.robot)
            elif self.robot.sensor == 'RGB':
                raise NotImplementedError
        else:
            if self.robot.sensor == 'coordinates':
                ob = [
                    human.get_next_observable_state(action)
                    for human, action in zip(self.humans, human_actions)
                ]
            elif self.robot.sensor == 'RGB':
                raise NotImplementedError

        return ob, reward, done, info
Exemple #6
0
    def step(self, action, non_attentive_humans, update=True):
        """
        Compute actions for all agents, detect collision, update environment and return (ob, reward, done, info)

        """

        human_actions = []

        # print(self.global_time)
        # count = self.global_time
        # non_attentive_humans = old_non_attentive_humans
        # if count != 0:
        # #     old_non_attentive_humans = []
        # # else:
        #     old_non_attentive_humans = non_attentive_humans
        # # only record the first time the human reaches the goal
        #
        # if count % 4 == 0:
        #     print("true")
        #
        #     non_attentive_humans = random.sample(self.humans, int(len(self.humans) / 2))
        #     old_non_attentive_humans = non_attentive_humans
        #
        # # else:
        # #     non_attentive_humans = old_non_attentive_humans

        # non_attentive_humans = set(non_attentive_humans)

        for i, human in enumerate(self.humans):
            # observation for humans is always coordinates
            # ob = [other_human.get_observable_state() for other_human in self.humans if other_human != human]

            if human not in non_attentive_humans:
                # print("attentive")
                human.set_attentive()
                ob = [
                    other_human.get_observable_state()
                    for other_human in self.humans if other_human != human
                ]

                # if self.robot.visible:
                ob += [self.robot.get_observable_state()]

            elif human in non_attentive_humans:
                # print("disattentive")
                ob = [
                    other_human.get_observable_state()
                    for other_human in self.humans if other_human != human
                ]

                human.set_non_attentive()

            # print(human.get_observable_state())

            human_actions.append(human.act(ob))

            if human.reached_destination():
                if np.random.random() > 0.5:
                    sign = -1
                else:
                    sign = 1
                human.set(human.px, human.py,
                          np.random.random() * self.square_width * 0.5 * -sign,
                          (np.random.random() - 0.5) * self.square_width, 0, 0,
                          0)

        # collision detection
        dmin = float('inf')
        collision = False
        ppl_count = 0
        closest_distance_all = []
        for i, human in enumerate(self.humans):
            px = human.px - self.robot.px
            py = human.py - self.robot.py

            if self.robot.kinematics == 'holonomic':
                vx = human.vx - action.vx
                vy = human.vy - action.vy
            else:
                vx = human.vx - action.v * np.cos(action.r + self.robot.theta)
                vy = human.vy - action.v * np.sin(action.r + self.robot.theta)
            ex = px + vx * self.time_step
            ey = py + vy * self.time_step
            # closest distance between boundaries of two agents
            closest_dist = point_to_segment_dist(
                px, py, ex, ey, 0, 0) - human.radius - self.robot.radius
            if closest_dist < 0:
                collision = True
                # logging.debug("Collision: distance between robot and p{} is {:.2E}".format(i, closest_dist))
                # break
            elif closest_dist < dmin:
                dmin = closest_dist
            elif closest_dist < 3:
                ppl_count += 1

        # collision detection between humans
        human_num = len(self.humans)
        for i in range(human_num):
            for j in range(i + 1, human_num):
                dx = self.humans[i].px - self.humans[j].px
                dy = self.humans[i].py - self.humans[j].py
                dist = (dx**2 + dy**2)**(
                    1 / 2) - self.humans[i].radius - self.humans[j].radius
                if dist < 0:
                    # detect collision but don't take humans' collision into account
                    logging.debug('Collision happens between humans in step()')

        # check if reaching the goal
        end_position = np.array(
            self.robot.compute_position(action, self.time_step))
        reaching_goal = norm(
            end_position -
            np.array(self.robot.get_goal_position())) < self.robot.radius

        if self.global_time >= self.time_limit - 1:
            reward = 0
            done = True
            info = Timeout()
        elif collision:
            reward = self.collision_penalty
            done = True
            info = Collision()
        elif reaching_goal:
            reward = self.success_reward
            done = True
            info = ReachGoal()
        elif dmin < self.discomfort_dist:
            # only penalize agent for getting too close if it's visible
            # adjust the reward based on FPS
            reward = (dmin - self.discomfort_dist
                      ) * self.discomfort_penalty_factor * self.time_step
            done = False
            info = Danger(dmin)
        else:
            reward = 0
            done = False
            info = Nothing()

        if update:
            # store state, action value and attention weights
            self.states.append([
                self.robot.get_full_state(),
                [human.get_full_state() for human in self.humans]
            ])
            if hasattr(self.robot.policy, 'action_values'):
                self.action_values.append(self.robot.policy.action_values)
            if hasattr(self.robot.policy, 'get_attention_weights'):
                self.attention_weights.append(
                    self.robot.policy.get_attention_weights())

            # update all agents
            self.robot.step(action)
            for i, human_action in enumerate(human_actions):
                self.humans[i].step(human_action)
            self.global_time += self.time_step
            for i, human in enumerate(self.humans):
                # only record the first time the human reaches the goal
                if self.human_times[i] == 0 and human.reached_destination():
                    self.human_times[i] = self.global_time

            # compute the observation
            if self.robot.sensor == 'coordinates':
                ob = [human.get_observable_state() for human in self.humans]
            elif self.robot.sensor == 'RGB':
                raise NotImplementedError
        else:
            if self.robot.sensor == 'coordinates':
                ob = [
                    human.get_next_observable_state(action)
                    for human, action in zip(self.humans, human_actions)
                ]
            elif self.robot.sensor == 'RGB':
                raise NotImplementedError

        return ob, reward, done, info, ppl_count, self.robot.get_position(
        ), self.robot.get_velocity(), dmin
Exemple #7
0
 def estimate_reward_on_predictor(self, state, next_state):
     """ If the time step is small enough, it's okay to model agent as linear movement during this period
     """
     # collision detection
     info = Nothing()
     if isinstance(state, list) or isinstance(state, tuple):
         state = tensor_to_joint_state(state)
     human_states = state.human_states
     robot_state = state.robot_state
     weight_goal = self.goal_factor
     weight_safe = self.discomfort_penalty_factor
     weight_terminal = 1.0
     re_collision = self.collision_penalty
     re_arrival = self.success_reward
     next_robot_state = next_state.robot_state
     next_human_states = next_state.human_states
     cur_position = np.array((robot_state.px, robot_state.py))
     end_position = np.array((next_robot_state.px, next_robot_state.py))
     goal_position = np.array((robot_state.gx, robot_state.gy))
     reward_goal = (norm(cur_position - goal_position) -
                    norm(end_position - goal_position))
     # check if reaching the goal
     reaching_goal = norm(
         end_position -
         np.array([robot_state.gx, robot_state.gy])) < robot_state.radius
     dmin = float('inf')
     collision = False
     safety_penalty = 0
     if human_states is None or next_human_states is None:
         safety_penalty = 0.0
         collision = False
     else:
         for i, human in enumerate(human_states):
             next_human = next_human_states[i]
             px = human.px - robot_state.px
             py = human.py - robot_state.py
             ex = next_human.px - next_robot_state.px
             ey = next_human.py - next_robot_state.py
             # closest distance between boundaries of two agents
             closest_dist = point_to_segment_dist(
                 px, py, ex, ey, 0, 0) - human.radius - robot_state.radius
             if closest_dist < 0:
                 collision = True
             if closest_dist < dmin:
                 dmin = closest_dist
             if closest_dist < self.discomfort_dist:
                 safety_penalty = safety_penalty + (closest_dist -
                                                    self.discomfort_dist)
         # dis_begin = np.sqrt(px ** 2 + py ** 2) - human.radius - robot_state.radius
         # dis_end = np.sqrt(ex ** 2 + ey ** 2) - human.radius - robot_state.radius
         # penalty_begin = 0
         # penalty_end = 0
         # discomfort_dist = 0.5
         # if dis_begin < discomfort_dist:
         #     penalty_begin = dis_begin - discomfort_dist
         # if dis_end < discomfort_dist:
         #     penalty_end = dis_end - discomfort_dist
         # safety_penalty = safety_penalty + (penalty_end - penalty_begin)
     reward_col = 0
     reward_arrival = 0
     if collision:
         reward_col = re_collision
         info = Collision()
     elif reaching_goal:
         reward_arrival = re_arrival
         info = ReachGoal()
     reward_terminal = reward_col + reward_arrival
     reward = weight_terminal * reward_terminal + weight_goal * reward_goal + weight_safe * safety_penalty
     # if collision:
     # reward = reward - 100
     return reward, info