def calculate_score_for_leaderboard(): """ Evaluate the performance of the network. This is the function to be used for the final ranking on the course-wide leader-board, only with a different set of seeds. Better not change it. """ # action variables a = np.array([0.0, 0.0, 0.0]) # init environement env = CarRacing() env.render() env.reset() seeds = [ 22597174, 68545857, 75568192, 91140053, 86018367, 49636746, 66759182, 91294619, 84274995, 31531469 ] total_reward = 0 for episode in range(10): env.seed(seeds[episode]) observation = env.reset() # init modules of the pipeline LD_module = LaneDetection(gradient_threshold=25, spline_smoothness=20) LatC_module = LateralController(gain_constant=1.8, damping_constant=0.05) LongC_module = LongitudinalController(KD=0.001) reward_per_episode = 0 for t in range(600): # perform step s, r, done, speed, info = env.step(a) # lane detection lane1, lane2 = LD_module.lane_detection(s) # waypoint and target_speed prediction waypoints = waypoint_prediction(lane1, lane2) target_speed = target_speed_prediction(waypoints, max_speed=60, exp_constant=6) # control a[0] = LatC_module.stanley(waypoints, speed) a[1], a[2] = LongC_module.control(speed, target_speed) # reward reward_per_episode += r env.render() print('episode %d \t reward %f' % (episode, reward_per_episode)) total_reward += np.clip(reward_per_episode, 0, np.infty) print('---------------------------') print(' total score: %f' % (total_reward / 10)) print('---------------------------')
# define variables total_reward = 0.0 steps = 0 restart = False # init modules of the pipeline LD_module = LaneDetection() LatC_module = LateralController() LongC_module = LongitudinalController() # init extra plot fig = plt.figure() plt.ion() plt.show() env.seed(91294619) while True: # perform step s, r, done, speed, info = env.step(a) # lane detection lane1, lane2 = LD_module.lane_detection(s) # waypoint and target_speed prediction waypoints = waypoint_prediction(lane1, lane2) target_speed = target_speed_prediction(waypoints, max_speed=60, exp_constant=4.5) # control a[0] = LatC_module.stanley(waypoints, speed)
if k == key.DOWN: a[2] = +0.8 # set 1.0 for wheels to block to zero rotation def key_release(k, mod): if k == key.LEFT and a[0] == -1.0: a[0] = 0 if k == key.RIGHT and a[0] == +1.0: a[0] = 0 if k == key.UP: a[1] = 0 if k == key.DOWN: a[2] = 0 env = CarRacing() env.render() env.viewer.window.on_key_press = key_press env.viewer.window.on_key_release = key_release while True: env.seed(seed=5) env.reset() total_reward = 0.0 steps = 0 restart = False env.render() while True: s, r, done, info = env.step(a) total_reward += r if steps % 200 == 0 or done: # print("\naction " + str(["{:+0.2f}".format(x) for x in a])) # print("step {} total_reward {:+0.2f}".format(steps, total_reward)) pass steps += 1 env.render() obs = state_to_1_batch_tensor(s)
env.reset() # define variables total_reward = 0.0 steps = 0 restart = False # init modules of the pipeline LD_module = LaneDetection() LatC_module = LateralController() # init extra plot fig = plt.figure() plt.ion() plt.show() env.seed(68545857) while True: # perform step s, r, done, speed, info = env.step(a) # lane detection lane1, lane2 = LD_module.lane_detection(s) # waypoint and target_speed prediction waypoints = waypoint_prediction(lane1, lane2) target_speed = target_speed_prediction(waypoints) # control with constant gas and no braking a[0] = LatC_module.stanley(waypoints, speed)