Beispiel #1
0
    def pose_control(self, robot):

        robot.goal = self.look_ahead(robot)

        # distance to goal
        distance = utils.euclidean_distance(robot.states, robot.goal)

        # PID controller for velocity
        velocity_control = self.gains[0] * distance
        # angle control
        angle_to_point = (
            utils.angle_between_points(robot.states, robot.goal) -
            robot.states[2][0] + math.pi) % (2 * math.pi) - math.pi
        beta = (robot.goal[2] - robot.states[2][0] - angle_to_point +
                math.pi) % (2 * math.pi) - math.pi

        # derivative term for angle control
        error_diff = angle_to_point - self.error_old
        self.error_old = angle_to_point

        # maneuver
        if angle_to_point > math.pi / 2 or angle_to_point < -math.pi / 2:
            velocity_control = -velocity_control

        return np.array([
            velocity_control, self.gains[2] * angle_to_point +
            self.gains[3] * beta + self.gains[1] * error_diff
        ])
Beispiel #2
0
 def error(self):
     angle_to_point = (utils.angle_between_points(self.states, self.goal) -
                       self.states[2][0] + math.pi) % (2 *
                                                       math.pi) - math.pi
     beta = (self.goal[2] - self.states[2, 0] - angle_to_point +
             math.pi) % (2.0 * math.pi) - math.pi
     error = np.array([0.0, 0.0, 0.0, 0.0])
     error[0] = self.states[0, 0] - self.goal[0]
     error[1] = self.states[1, 0] - self.goal[1]
     error[2] = -angle_to_point
     error[3] = self.states[3, 0]
     return error
Beispiel #3
0
    def parse(self, obs, prev_action=None):
        active_index = obs['active']
        player_pos = obs['left_team'][active_index]
        player_vel = obs['left_team_direction'][active_index]
        player_tired_factor = obs['left_team_tired_factor'][active_index]

        active_player = np.array([
            player_pos[0], player_pos[1] / 0.42, *player_vel,
            player_tired_factor
        ])

        teammates = []
        for i in range(len(obs["left_team"])):
            # We purposely repeat ourselves to maintain consistency of roles
            i_player_pos = obs['left_team'][i]
            i_player_vel = obs['left_team_direction'][i]
            i_player_tired_factor = obs['left_team_tired_factor'][i]
            i_dist = dist_between_points(player_pos, i_player_pos)
            i_vel_mag = np.linalg.norm(i_dist)
            i_vel_ang = arctan2(i_player_vel[1], i_player_vel[0])
            angle = angle_between_points(player_pos, i_player_pos)
            teammates.append([
                i_player_pos[0], i_player_pos[1] / 0.42, i_dist,
                np.cos(angle),
                np.sin(angle), i_vel_mag,
                np.cos(i_vel_ang),
                np.sin(i_vel_ang), i_player_tired_factor
            ])

        enemy_team = []
        for i in range(len(obs["right_team"])):
            i_player_pos = obs['right_team'][i]
            i_player_vel = obs['right_team_direction'][i]
            i_player_tired_factor = obs['right_team_tired_factor'][i]
            i_dist = dist_between_points(player_pos, i_player_pos)
            i_vel_mag = np.linalg.norm(i_dist)
            i_vel_ang = arctan2(i_player_vel[1], i_player_vel[0])
            angle = angle_between_points(player_pos, i_player_pos)
            teammates.append([
                i_player_pos[0], i_player_pos[1] / 0.42, i_dist,
                np.cos(angle),
                np.sin(angle), i_vel_mag,
                np.cos(i_vel_ang),
                np.sin(i_vel_ang), i_player_tired_factor
            ])

        teammates = np.array(teammates).flatten()
        enemy_team = np.array(enemy_team).flatten()
        curr_dist_to_goal = dist_to_goal(
            obs
        )  # Closer distance have larger variance, farther less important

        # get other information
        game_mode = [0 for _ in range(7)]
        if (type(obs['game_mode']) is GameMode):
            game_mode[obs['game_mode'].value] = 1
        else:
            game_mode[obs['game_mode']] = 1

        sticky_action = [0 for _ in range(len(sticky_action_lookup))]
        if type(obs['sticky_actions']) is set:
            for action in obs['sticky_actions']:
                sticky_action[sticky_action_lookup[action.name]] = 1
        else:
            sticky_action = obs['sticky_actions']

        active_team = obs['ball_owned_team']
        prev_team = self.constant_lookup['prev_team']
        action_vec = self.constant_lookup['intermediate_action_vec']
        possession = False  # Determine if we have possession or not
        if ((active_team == 0 and prev_team == 0)
                or (active_team == 1 and prev_team == 0)
                or (active_team == 1 and prev_team == 1)):
            # Reset if lose the ball or keep the ball on pass
            self.constant_lookup['intermediate_action_vec'] = [0, 0, 0, 0]
            possession = False
        elif (active_team == -1 and prev_team == 0
              and prev_action is not None):
            # Nobody owns right now and you had possession
            # Track prev actions
            if (type(prev_action) is Action
                    and prev_action.value in inter_action_vec_lookup):
                action_vec[inter_action_vec_lookup[prev_action.value]] = 1
            elif prev_action in inter_action_vec_lookup:
                action_vec[inter_action_vec_lookup[prev_action]] = 1
            possession = True

        if not possession and active_team == 0:
            possession = True

        if active_team != -1:
            self.constant_lookup['prev_team'] = active_team

        self.constant_lookup['possession'] = possession
        l_score, r_score = obs['score']
        prev_l_score, prev_r_score = self.constant_lookup[
            'prev_l_score'], self.constant_lookup['prev_r_score']

        l_score_change = l_score - prev_l_score
        r_score_change = r_score - prev_r_score

        scalars = [
            obs['ball'][0],
            obs['ball'][1] / 0.42,
            *obs['ball_direction'],
            obs['steps_left'] / 3000,
            *game_mode,
            curr_dist_to_goal,
            *sticky_action,  # Tracks sticky actions
            *action_vec,  # Tracks long term actions
            l_score_change,
            r_score_change,
            possession,
            active_team
        ]

        scalars = np.r_[scalars].astype(np.float32)
        action_mask = self.action_mask(obs)
        combined = np.concatenate([
            active_player.flatten(),
            teammates.flatten(),
            enemy_team.flatten(),
            scalars.flatten(),
            action_mask.flatten()
        ])
        done = False
        if obs['steps_left'] == 0:
            done = True

        reward = possession_score_reward(obs, possession, l_score_change,
                                         r_score_change, prev_action, l_score,
                                         r_score, done)

        self.constant_lookup['prev_r_score'] = r_score
        self.constant_lookup['prev_l_score'] = l_score

        return combined, (l_score, r_score, reward)