def __init__(self, env, FPS=50.0): self.env = env self.env.reset() self.car = Car(env.unwrapped.car, 10., 0.) self.track = env.unwrapped.track self.dt = 1 / FPS self.long_term_planning_length = 30 self.short_term_planning_length = 10
def initialize_agent(self): self.me = Car(self.team, self.index) self.goals = [Goal(0), Goal(1)] self.ball = Ball() if not self.boost_tracker: self.boost_tracker = BoostTracker() self.boost_tracker.initialize_all_boost(self.get_field_info()) self.actions = ActionChain()
def update_cars(self, packet: GameTickPacket): self.allies = [ Car(c.team, i, packet) for i, c in enumerate(packet.game_cars) if c.team == self.team and i != self.index ] self.enemies = [ Car(c.team, i, packet) for i, c in enumerate(packet.game_cars) if c.team != self.team ] self.me = Car(self.team, self.index, packet)
def run(self, car: Car = None, ball: Ball = None) -> SimpleControllerState: if self.finished: return SimpleControllerState() self.controls = fly_to_target(car, self.target, controls=self.controls) if car.local(self.target - car.location).length() < 100: self.finished = True if self.with_the_quickness: self.controls.boost = True return self.controls
def main(): env = gym.make('CarRacing-v0') done = False env.reset() car = env.unwrapped.car w = car.wheels[0] dt = 1 / FPS prev_a = MAX_a prev_steer = 0 total_reward = 0 for i in range(1000000): print('########################') ind = env.unwrapped.tile_visited_count ind = ind % len(env.unwrapped.track) target = env.unwrapped.track[ind][2:4] x, y = car.hull.position vx, vy = w.linearVelocity * 1 theta = atan2(vy, vx) dtheta = 0 obs = x, y, theta, vy, vx, dtheta, target long_term_N = 30 long_term_target = build_long_term_larget(env.unwrapped.track, ind, (x, y), dt, long_term_N) ego_car = Car(obs, (prev_a, prev_steer)) t = time() a, steer, x, y = long_term_MPC(ego_car, long_term_target, dt, long_term_N) print('long term solve time:', time() - t) short_term_N = 5 short_term_target = list(zip(x, y))[:short_term_N] t = time() a, steer, x, y = short_term_MPC(ego_car, short_term_target, dt, short_term_N) print('short term solve time:', time() - t) prev_a, prev_steer = a, steer print(a, steer, np.linalg.norm((vx, vy))) a = a / MAX_a steer = steer if a > 0: action = steer, a / 10, 0 else: action = steer, 0, -a _, r, done, _ = env.step(action) total_reward += r # env.render() print(total_reward)
def run(self, car: Car = None, ball: Ball = None) -> SimpleControllerState: if self.finished: return SimpleControllerState() target = ball.location + self.ball_center_offset self.controls = drive_to_target(car, target, controls=self.controls) if car.local(target - car.location).length() < 650: self.controls.jump = True self.finished = True print("Kicked off") return self.controls
def initialize_target_boost(self, car: Car): if not car.flying: if not self.max_time: self.boost, self.pad = car.get_closest_boosts( self.boost_tracker, self.in_path) if not self.boost: self.boost = self.pad else: self.boost, self.pad, times = car.get_closest_boosts( self.boost_tracker, in_current_path=self.in_path, path_angle_limit=0, return_time_to=True) # No boost reachable. Life sucks if times[0] >= self.max_time and times[1] >= self.max_time: return False if times[1] < self.max_time: self.boost = self.pad print("Boost target acquired!") self.target = Vec3(self.boost.location) return True
def run(self, car: Car = None, ball: Ball = None): # get stopping time in frames if self.frame == 0: self.frames_to_stop = car.time_to_stop(coast=False) / dt # print("parking speed: ", self.speed) if self.face_ball: self.controls = drive_to_target(car, ball.location, self.controls, speed=self.speed) else: self.controls.throttle = clamp(self.speed) self.controls.boost = False self.controls.steer = 0 self.controls.handbrake = False self.frame += 1 if self.frame >= self.frames_to_stop: self.finished = True return self.controls
def run(self, car: Car = None, ball: Ball = None) -> SimpleControllerState: if self.finished: return SimpleControllerState() if not self.boost and self.boost_tracker is not None: if not self.initialize_target_boost(car): self.finished = True # Bail if finished, no boost passed, or boost no longer active if self.finished or (not self.boost): return self.controls self.controls = drive_to_target(car, self.target.flat(), controls=self.controls) # finished if close enough, boost taken, or car got enough along the way if (car.local(self.target - car.location).length() < 100 or not self.boost.is_active) or car.boost > 99: print("Grabbed boost!") self.finished = True return self.controls
def run(self, car: Car=None, ball: Ball=None) -> SimpleControllerState: if self.finished: print("Cops busted me.... Joyride done") return self.controls distance_remaining = (self.target-car.location).length() stop_distance = car.stop_distance(not self.with_the_quickness) print("distance: ", distance_remaining, " stop distance: ", stop_distance) # close enough and nowhere to look next if distance_remaining <= stop_distance: self.finished = True self.controls = drive_to_target(car, self.target, controls=self.controls) # self.controls.throttle = 1 if self.with_the_quickness else 0 # self.controls.boost = self.with_the_quickness # print(" Finished my joyride") elif stop_distance < 300: self.finished = True self.controls.throttle = 1 if self.with_the_quickness else 0 self.controls.boost = self.with_the_quickness # print(" Finished my joyride") else: self.controls = drive_to_target(car, self.target, controls=self.controls) return self.controls
def long_term_MPC(ego_car: Car, route_xs, route_ys, dt): assert len(route_xs) == len(route_ys) N = len(route_xs) ego_car_x, ego_car_y, ego_car_v, ego_car_theta, ego_car_a, ego_car_steer = ego_car.get_car_state( ) x_init = [ego_car_x] * N y_init = [ego_car_y] * N theta_init = [ego_car_theta] * N v_init = [ego_car_v] * N steer_init = [ego_car_steer] * (N - 1) a_init = [MAX_a] * (N - 1) initial_value = x_init + y_init + theta_init + v_init + \ steer_init + a_init # vars to be optimize x = SX.sym('x', N) y = SX.sym('y', N) theta = SX.sym('theta', N) v = SX.sym('v', N) steer = SX.sym('steer', N - 1) a = SX.sym('a', N - 1) all_vars = vertcat(x, y, theta, v, steer, a) # vars upper bound ub_constrains_x = np.array([ego_car_x] + [INF] * (N - 1)) ub_constrains_y = np.array([ego_car_y] + [INF] * (N - 1)) ub_constrains_theta = np.array([ego_car_theta] + [INF] * (N - 1)) ub_constrains_v = np.array([ego_car_v] + [MAX_v] * (N - 1)) ub_constrains_steer = np.array([ego_car_steer] + [MAX_steer] * (N - 2)) ub_constrains_a = np.array([ego_car_a] + [MAX_a] * (N - 2)) ub_constrains_vars = np.hstack([ ub_constrains_x, ub_constrains_y, ub_constrains_theta, ub_constrains_v, ub_constrains_steer, ub_constrains_a ]) # vars lower bound lb_constrains_x = np.array([ego_car_x] + [-INF] * (N - 1)) lb_constrains_y = np.array([ego_car_y] + [-INF] * (N - 1)) lb_constrains_theta = np.array([ego_car_theta] + [-INF] * (N - 1)) lb_constrains_v = np.array([ego_car_v] + [-MIN_v] * (N - 1)) lb_constrains_steer = np.array([ego_car_steer] + [MIN_steer] * (N - 2)) lb_constrains_a = np.array([ego_car_a] + [MIN_a] * (N - 2)) lb_constrains_vars = np.hstack([ lb_constrains_x, lb_constrains_y, lb_constrains_theta, lb_constrains_v, lb_constrains_steer, lb_constrains_a ]) # define constrain function g (variables update equation) x_constrain = SX.sym('x_constrain', N - 1) y_constrain = SX.sym('y_constrain', N - 1) theta_constrain = SX.sym('theta_constrain', N - 1) v_constrain = SX.sym('v_constrain', N - 1) SCALE = 0.002 for i in range(N - 1): theta_diff = atan(tan(steer[i]) / 2) * v[i] * dt * SCALE # theta_diff = steer[i] * dt x_constrain[i] = x[i + 1] - (x[i] + v[i] * dt * np.cos(theta[i])) y_constrain[i] = y[i + 1] - (y[i] + v[i] * dt * np.sin(theta[i])) theta_constrain[i] = theta[i + 1] - (theta[i] - theta_diff) v_constrain[i] = v[i + 1] - (v[i] + a[i] * dt) all_constrain = vertcat(x_constrain, y_constrain, theta_constrain, v_constrain) ub_constrains_g = np.zeros([4 * (N - 1)]) lb_constrains_g = np.zeros([4 * (N - 1)]) # define cost function f cost = 0 for i in range(N): # deviation cost += 20 / N**3 * (N - i)**4 * (x[i] - route_xs[i])**2 cost += 20 / N**3 * (N - i)**4 * (y[i] - route_ys[i])**2 # control cost if i < N - 2: cost += 5 * N * steer[i]**2 cost += 0.01 * N * a[i]**2 cost += 20 * N * (steer[i + 1] - steer[i])**2 # cost += 0.1 * N * (a[i + 1] - a[i]) ** 2 nlp = {'x': all_vars, 'f': cost, 'g': all_constrain} S = nlpsol('S', 'ipopt', nlp, { "print_time": False, "ipopt": { "print_level": 0 } }) result = S(x0=initial_value, lbg=lb_constrains_g, ubg=ub_constrains_g, lbx=lb_constrains_vars, ubx=ub_constrains_vars) # def print_result(): # print('route_x', [i[0] for i in route]) # print('x', result['x'][0:N]) # print('route_y', [i[1] for i in route]) # print('y', result['x'][N:2 * N]) # # print('theta', result['x'][2 * N:3 * N]) # print('v', result['x'][3 * N:4 * N]) # print('vx', result['x'][3 * N:4 * N] * np.cos(result['x'][2 * N:3 * N])) # print('vy', result['x'][3 * N:4 * N] * np.sin(result['x'][2 * N:3 * N])) # # print('steer', result['x'][4 * N:5 * N - 1]) # print('a', result['x'][5 * N - 1:6 * N - 2]) # print_result() xs = result['x'][0:N].elements() ys = result['x'][N:2 * N].elements() steer = float(result['x'][4 * N + 1]) a = float(result['x'][5 * N]) a = a / MAX_a if a > 0: control = Control(steer, a / 10, 0) else: control = Control(steer, 0, -a) # controls = [] # for a, steer in zip(result['x'][5 * N - 1:6 * N - 2].elements(), result['x'][4 * N:5 * N - 1].elements()): # # steer = float(result['x'][4 * N + 1]) # # a = float(result['x'][5 * N]) # a = a / MAX_a # if a > 0: # controls.append(Control(steer, a / 10, 0)) # else: # controls.append(Control(steer, 0, -a)) return control, xs, ys
def short_term_MPC(ego_car: Car, route_xs, route_ys, dt, verbose=False): assert len(route_xs) == len(route_ys) N = len(route_xs) ego_car_x, ego_car_y, ego_car_v, ego_car_theta, ego_car_a, ego_car_steer = ego_car.get_car_state( ) x_init = [ego_car_x] * N y_init = [ego_car_y] * N theta_init = [ego_car_theta] * N v_init = [ego_car_v] * N steer_init = [ego_car_steer] * (N - 1) a_init = [MAX_a] * (N - 1) initial_value = x_init + y_init + theta_init + v_init + \ steer_init + a_init # vars to be optimize x = SX.sym('x', N) y = SX.sym('y', N) theta = SX.sym('theta', N) v = SX.sym('v', N) steer = SX.sym('steer', N - 1) a = SX.sym('a', N - 1) all_vars = vertcat(x, y, theta, v, steer, a) # vars upper bound ub_constrains_x = np.array([ego_car_x] + [INF] * (N - 1)) ub_constrains_y = np.array([ego_car_y] + [INF] * (N - 1)) ub_constrains_theta = np.array([ego_car_theta] + [INF] * (N - 1)) ub_constrains_v = np.array([ego_car_v] + [MAX_v] * (N - 1)) ub_constrains_steer = np.array([ego_car_steer] + [MAX_steer] * (N - 2)) ub_constrains_a = np.array([ego_car_a] + [MAX_a] * (N - 2)) ub_constrains_vars = np.hstack([ ub_constrains_x, ub_constrains_y, ub_constrains_theta, ub_constrains_v, ub_constrains_steer, ub_constrains_a ]) # vars lower bound lb_constrains_x = np.array([ego_car_x] + [-INF] * (N - 1)) lb_constrains_y = np.array([ego_car_y] + [-INF] * (N - 1)) lb_constrains_theta = np.array([ego_car_theta] + [-INF] * (N - 1)) lb_constrains_v = np.array([ego_car_v] + [-MIN_v] * (N - 1)) lb_constrains_steer = np.array([ego_car_steer] + [MIN_steer] * (N - 2)) lb_constrains_a = np.array([ego_car_a] + [MIN_a] * (N - 2)) lb_constrains_vars = np.hstack([ lb_constrains_x, lb_constrains_y, lb_constrains_theta, lb_constrains_v, lb_constrains_steer, lb_constrains_a ]) # define constrain function g (variables update equation) x_constrain = SX.sym('x_constrain', N - 1) y_constrain = SX.sym('y_constrain', N - 1) theta_constrain = SX.sym('theta_constrain', N - 1) v_constrain = SX.sym('v_constrain', N - 1) SCALE = 0.002 for i in range(N - 1): # theta_diff = atan(tan(steer[i]) / 2) * v[i] * dt * SCALE theta_diff = steer[i] * v[i] * dt * SCALE x_constrain[i] = x[i + 1] - (x[i] + v[i] * dt * np.cos(theta[i])) y_constrain[i] = y[i + 1] - (y[i] + v[i] * dt * np.sin(theta[i])) theta_constrain[i] = theta[i + 1] - (theta[i] - theta_diff) v_constrain[i] = v[i + 1] - (v[i] + a[i] * dt) all_constrain = vertcat(x_constrain, y_constrain, theta_constrain, v_constrain) ub_constrains_g = np.zeros([4 * (N - 1)]) lb_constrains_g = np.zeros([4 * (N - 1)]) # define cost function f cost = 0 for i in range(N): # deviation cost += 20 * N * (x[i] - route_xs[i])**2 cost += 20 * N * (y[i] - route_ys[i])**2 if i < N - 2: cost += 1 * N * steer[i]**2 # cost += 0.01 * N * a[i] ** 2 cost += 50 * N * (steer[i + 1] - steer[i])**2 nlp = {'x': all_vars, 'f': cost, 'g': all_constrain} S = nlpsol('S', 'ipopt', nlp, { "print_time": False, "ipopt": { "print_level": 0 } }) result = S(x0=initial_value, lbg=lb_constrains_g, ubg=ub_constrains_g, lbx=lb_constrains_vars, ubx=ub_constrains_vars) steer = float(result['x'][4 * N + 1]) a = float(result['x'][5 * N]) a = a / MAX_a if a > 0: action = Control(steer, a / 10, 0) else: action = Control(steer, 0, -a) if verbose: xs = result['x'][0:N].elements() ys = result['x'][N:2 * N].elements() pprint(xs) pprint(ys) return action, None, None
def get_steer(car: Car, target: Vec3): local_target = car.local(target) local_target_norm = local_target.normalized() turn_angle = math.atan2(local_target_norm.y, local_target_norm.x) return turn_angle
class Environment: def __init__(self, env, FPS=50.0): self.env = env self.env.reset() self.car = Car(env.unwrapped.car, 10., 0.) self.track = env.unwrapped.track self.dt = 1 / FPS self.long_term_planning_length = 30 self.short_term_planning_length = 10 def render(self): self.env.render() def observe(self): x, y = self.car.get_position() vx, vy = self.car.get_wheel().linearVelocity * 1 theta = atan2(vy, vx) return x, y, vx, vy, theta def step(self, control: Control): obs = self.observe() _, reward, done, info = self.env.step(control.to_tuple()) self.car.take_control(control) return obs, reward, done, info def seed(self, seed_val): self.env.seed(seed_val) def reset(self): state = self.env.reset() self.car = Car(self.env.unwrapped.car, 10, 0) return state def get_car(self): return self.car def get_current_waypoint_index(self): ind = self.env.unwrapped.tile_visited_count ind = ind % len(self.get_track()) return ind def get_track(self): return self.env.unwrapped.track def calc_long_term_targets(self): ind = self.get_current_waypoint_index() track = self.get_track() pos = self.car.get_position() desired_v = 60 dist_travel = desired_v * self.dt def get_point(start, end, d_to_go): x0, y0 = start x1, y1 = end dy = y1 - y0 dx = x1 - x0 d = np.linalg.norm((dx, dy)) x = x0 + d_to_go * dx / d y = y0 + d_to_go * dy / d return x, y cur_pos = np.array(pos) cur_target = np.array(track[ind][2:4]) # result = [pos] xs, ys = [pos[0]], [pos[1]] for i in range(self.long_term_planning_length - 1): remain_dist = np.linalg.norm(cur_target - cur_pos) - dist_travel if remain_dist > 0: p = get_point(cur_pos, cur_target, dist_travel) # result.append(p) xs.append(p[0]) ys.append(p[1]) cur_pos = p else: # must ensure distance between 2 target points larger than dist_travel cur_pos = cur_target ind = (ind + 1) % len(track) cur_target = np.array(track[ind][2:4]) p = get_point(cur_pos, cur_target, -remain_dist) xs.append(p[0]) ys.append(p[1]) cur_pos = p return xs, ys
def reset(self): state = self.env.reset() self.car = Car(self.env.unwrapped.car, 10, 0) return state
class Dingus(BaseAgent): def __init__(self, name, team, index): super().__init__(name, team, index) self.allies: list[Car] = [] self.enemies: list[Car] = [] self.me: Car = None self.goals: list[Goal] = None self.ball: Ball = None self.actions: ActionChain = None self.game_info: GameInfo = None self.boost_tracker: BoostTracker = None self.ready_for_kickoff: bool = False self.ball_prediction = None # self.state = "parked" self.just_got_boost = False self.debug_targets = [] def initialize_agent(self): self.me = Car(self.team, self.index) self.goals = [Goal(0), Goal(1)] self.ball = Ball() if not self.boost_tracker: self.boost_tracker = BoostTracker() self.boost_tracker.initialize_all_boost(self.get_field_info()) self.actions = ActionChain() # Taken from GoslingUtils def debug_actions(self, only_current=True): white_color = self.renderer.white() blue_color = self.renderer.blue() red_color = self.renderer.red() yellow_color = self.renderer.yellow() names = self.actions.get_chain_names() self.renderer.draw_string_2d(10, 50, 3, 3, self.actions.last_state, blue_color) if len(names) > 0: for i in range(len(names)): self.renderer.draw_string_2d(10, 50 + (50 * (len(names) - i)), 3, 3, names[i], white_color) if hasattr( self.actions.action_list[i], "target" ) and self.actions.action_list[i].target is not None: target_end = Vec3( self.actions.action_list[i].target.x, self.actions.action_list[i].target.y, self.actions.action_list[i].target.z + 300) if i == 0: self.renderer.draw_line_3d( self.actions.action_list[i].target, target_end, yellow_color) else: self.renderer.draw_line_3d( self.actions.action_list[i].target, target_end, red_color) def line(self, start, end, color=None, alpha=255): color = color if color is not None else [255, 255, 255] self.renderer.draw_line_3d(Vec3(start), Vec3(end), self.renderer.create_color(alpha, *color)) def is_kickoff(self): ret = self.game_info.is_kickoff_pause and self.game_info.is_round_active if ret: self.ready_for_kickoff = True return ret def reset_for_kickoff(self): self.actions = ActionChain(action_list=[BaseKickoff()]) self.debug_targets = [] # self.state = "kickoff" def update_cars(self, packet: GameTickPacket): self.allies = [ Car(c.team, i, packet) for i, c in enumerate(packet.game_cars) if c.team == self.team and i != self.index ] self.enemies = [ Car(c.team, i, packet) for i, c in enumerate(packet.game_cars) if c.team != self.team ] self.me = Car(self.team, self.index, packet) def preprocess(self, packet: GameTickPacket): if packet.num_cars != len(self.allies) + len(self.enemies) + 1: self.update_cars(packet) [c.update(packet) for c in self.allies] [c.update(packet) for c in self.enemies] self.me.update(packet) self.ball.update(packet) self.game_info = packet.game_info self.boost_tracker.update(packet) # def increment_state(self): # if len(self.future_actions) < 1: # self.current_action = None # elif self.current_action.finished: # self.current_action = self.future_actions[0] # del self.future_actions[0] # else: # self.current_action = None # self.future_actions = [] def get_output(self, packet: GameTickPacket) -> SimpleControllerState: # Preliminary things to do self.preprocess(packet) self.renderer.begin_rendering() self.debug_actions(False) out: SimpleControllerState = SimpleControllerState() # self.ball_prediction = self.get_ball_prediction_struct() # print(self.actions) # Draw some shit self.line(self.me.location, self.ball.location, [0, 255, 0]) # self.state = self.actions.last_state # No current plans """ If enemy onside, go back post and wait if not doing either yet if enemy offside, grab boost if you need it, otherwise ball chase. boost if the enemy is closer to the ball Should break routine to go back post if enemy goes onside """ enemy_onside = self.enemies[0].onside(self.ball.location, 200) # Maybe get fancy by checking if this is true in 2 seconds???? back_post = self.goals[self.team].get_back_post_rotation( self.ball.location) back_boost = self.boost_tracker.get_back_boost(self.me.side, -self.ball.side) closer_to_ball = self.me.local( self.ball.location - self.me.location).length() < self.enemies[0].local( self.ball.location - self.enemies[0].location).length() print("Dingus closer to ball: ", closer_to_ball) if not self.actions.busy: if not enemy_onside or self.actions.last_state != "defending": if self.me.boost < 25 and self.actions.last_state != "grabbing boost": self.actions.append( BoostGrab(boost=None, boost_tracker=self.boost_tracker)) else: self.actions.append( Ballchase(self.ball.last_touch_time, with_the_quickness=not closer_to_ball)) # print(self.state == "going to defend") elif enemy_onside: # if going to defend, this has already been called. if defending, already backpost print("actions.state: ", self.actions.last_state) print("action.state != defending: ", self.actions.last_state != "defending") print("action.state != going to defend: ", self.actions.last_state != "going to defend") if self.actions.last_state != "going to defend" and self.actions.last_state != "defending": print("Adding another joyride???") self.go_back_post(boost_first=True) # check for kickoff reset right before running actions if self.is_kickoff() and self.ready_for_kickoff: self.reset_for_kickoff() if self.actions.busy: print("doing action: ", self.actions.current.__class__.__name__) if hasattr(self.actions.current, "target") and self.actions.current.target is not None: self.line(self.me.location, self.actions.current.target, [255, 0, 0]) out = self.actions.execute(packet, car=self.me, ball=self.ball) # final things to do self.renderer.end_rendering() return out def make_a_plan(self): pass def go_back_post(self, boost_first=True): back_post = self.goals[self.team].get_back_post_rotation( self.ball.location) back_post_sign = self.goals[self.team].get_back_post_sign( self.ball.location) back_boost = self.boost_tracker.get_back_boost(self.me.side, back_post_sign) back_boost_prep_target = Vec3( back_boost.location.x + (back_post_sign * 250), back_boost.location.y - (self.me.side * 250), 0) self.actions.interrupt_now() if boost_first: self.actions.append( Joyride("going to defend", target=back_boost_prep_target, with_the_quickness=False)) self.actions.append( BoostGrab(boost=back_boost, state="going to defend")) self.actions.append(Joyride("going to defend", target=back_post)) self.actions.append( Park("defending", face_ball=False, apply_break=False))
def liftoff_angles(car: Car, target:Vec3): local_target = car.local(target) yaw = math.atan2(local_target.y, local_target.x) pitch = math.atan2(local_target.x, local_target.z) return yaw, pitch