def pose_control(self, robot): robot.goal = self.look_ahead(robot) # distance to goal distance = utils.euclidean_distance(robot.states, robot.goal) # PID controller for velocity velocity_control = self.gains[0] * distance # angle control angle_to_point = ( utils.angle_between_points(robot.states, robot.goal) - robot.states[2][0] + math.pi) % (2 * math.pi) - math.pi beta = (robot.goal[2] - robot.states[2][0] - angle_to_point + math.pi) % (2 * math.pi) - math.pi # derivative term for angle control error_diff = angle_to_point - self.error_old self.error_old = angle_to_point # maneuver if angle_to_point > math.pi / 2 or angle_to_point < -math.pi / 2: velocity_control = -velocity_control return np.array([ velocity_control, self.gains[2] * angle_to_point + self.gains[3] * beta + self.gains[1] * error_diff ])
def error(self): angle_to_point = (utils.angle_between_points(self.states, self.goal) - self.states[2][0] + math.pi) % (2 * math.pi) - math.pi beta = (self.goal[2] - self.states[2, 0] - angle_to_point + math.pi) % (2.0 * math.pi) - math.pi error = np.array([0.0, 0.0, 0.0, 0.0]) error[0] = self.states[0, 0] - self.goal[0] error[1] = self.states[1, 0] - self.goal[1] error[2] = -angle_to_point error[3] = self.states[3, 0] return error
def parse(self, obs, prev_action=None): active_index = obs['active'] player_pos = obs['left_team'][active_index] player_vel = obs['left_team_direction'][active_index] player_tired_factor = obs['left_team_tired_factor'][active_index] active_player = np.array([ player_pos[0], player_pos[1] / 0.42, *player_vel, player_tired_factor ]) teammates = [] for i in range(len(obs["left_team"])): # We purposely repeat ourselves to maintain consistency of roles i_player_pos = obs['left_team'][i] i_player_vel = obs['left_team_direction'][i] i_player_tired_factor = obs['left_team_tired_factor'][i] i_dist = dist_between_points(player_pos, i_player_pos) i_vel_mag = np.linalg.norm(i_dist) i_vel_ang = arctan2(i_player_vel[1], i_player_vel[0]) angle = angle_between_points(player_pos, i_player_pos) teammates.append([ i_player_pos[0], i_player_pos[1] / 0.42, i_dist, np.cos(angle), np.sin(angle), i_vel_mag, np.cos(i_vel_ang), np.sin(i_vel_ang), i_player_tired_factor ]) enemy_team = [] for i in range(len(obs["right_team"])): i_player_pos = obs['right_team'][i] i_player_vel = obs['right_team_direction'][i] i_player_tired_factor = obs['right_team_tired_factor'][i] i_dist = dist_between_points(player_pos, i_player_pos) i_vel_mag = np.linalg.norm(i_dist) i_vel_ang = arctan2(i_player_vel[1], i_player_vel[0]) angle = angle_between_points(player_pos, i_player_pos) teammates.append([ i_player_pos[0], i_player_pos[1] / 0.42, i_dist, np.cos(angle), np.sin(angle), i_vel_mag, np.cos(i_vel_ang), np.sin(i_vel_ang), i_player_tired_factor ]) teammates = np.array(teammates).flatten() enemy_team = np.array(enemy_team).flatten() curr_dist_to_goal = dist_to_goal( obs ) # Closer distance have larger variance, farther less important # get other information game_mode = [0 for _ in range(7)] if (type(obs['game_mode']) is GameMode): game_mode[obs['game_mode'].value] = 1 else: game_mode[obs['game_mode']] = 1 sticky_action = [0 for _ in range(len(sticky_action_lookup))] if type(obs['sticky_actions']) is set: for action in obs['sticky_actions']: sticky_action[sticky_action_lookup[action.name]] = 1 else: sticky_action = obs['sticky_actions'] active_team = obs['ball_owned_team'] prev_team = self.constant_lookup['prev_team'] action_vec = self.constant_lookup['intermediate_action_vec'] possession = False # Determine if we have possession or not if ((active_team == 0 and prev_team == 0) or (active_team == 1 and prev_team == 0) or (active_team == 1 and prev_team == 1)): # Reset if lose the ball or keep the ball on pass self.constant_lookup['intermediate_action_vec'] = [0, 0, 0, 0] possession = False elif (active_team == -1 and prev_team == 0 and prev_action is not None): # Nobody owns right now and you had possession # Track prev actions if (type(prev_action) is Action and prev_action.value in inter_action_vec_lookup): action_vec[inter_action_vec_lookup[prev_action.value]] = 1 elif prev_action in inter_action_vec_lookup: action_vec[inter_action_vec_lookup[prev_action]] = 1 possession = True if not possession and active_team == 0: possession = True if active_team != -1: self.constant_lookup['prev_team'] = active_team self.constant_lookup['possession'] = possession l_score, r_score = obs['score'] prev_l_score, prev_r_score = self.constant_lookup[ 'prev_l_score'], self.constant_lookup['prev_r_score'] l_score_change = l_score - prev_l_score r_score_change = r_score - prev_r_score scalars = [ obs['ball'][0], obs['ball'][1] / 0.42, *obs['ball_direction'], obs['steps_left'] / 3000, *game_mode, curr_dist_to_goal, *sticky_action, # Tracks sticky actions *action_vec, # Tracks long term actions l_score_change, r_score_change, possession, active_team ] scalars = np.r_[scalars].astype(np.float32) action_mask = self.action_mask(obs) combined = np.concatenate([ active_player.flatten(), teammates.flatten(), enemy_team.flatten(), scalars.flatten(), action_mask.flatten() ]) done = False if obs['steps_left'] == 0: done = True reward = possession_score_reward(obs, possession, l_score_change, r_score_change, prev_action, l_score, r_score, done) self.constant_lookup['prev_r_score'] = r_score self.constant_lookup['prev_l_score'] = l_score return combined, (l_score, r_score, reward)