def reset(self): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] self.state = np.zeros(self.observation_space.shape) self._last_rewards = [] while True: success = self._create_track() if success: break if self.verbose == 1: print( "retry to generate track (normal if there are not many instances of this message)" ) self.car = Car(self.world, *self.track[0][1:4]) # there are 20 frames of noise at the begining (+ 4 frames per state) for _ in range(24): obs = self.step(None)[0] return obs
def reset( self, *, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None, ): super().reset(seed=seed) self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.new_lap = False self.road_poly = [] while True: success = self._create_track() if success: break if self.verbose == 1: print("retry to generate track (normal if there are not many" "instances of this message)") self.car = Car(self.world, *self.track[0][1:4]) if not return_info: return self.step(None)[0] else: return self.step(None)[0], {}
def reset(self): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] self.obstacle_poly = [] self.steps = 0 while True: success_track = self._create_track() if self.obstacles: if success_track: success_obstacles = self._create_obstacles() else: success_obstacles = False else: success_obstacles = True # just so it goes through to next stage if success_track and success_obstacles: break if self.verbose == 1: print("retry to generate track (normal if there are not many" "instances of this message)") self.car = Car(self.world, *self.track[0][1:4]) return self.step(None)[0]
def reset(self): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] while True: success = self._create_track() if success: break if self.verbose == 1: print( "retry to generate track (normal if there are not many of this messages)" ) if self.random: startpos = randint(110, 250) ind = 20 else: startpos = 5 ind = 4 self.car = Car(self.world, *self.track[startpos][1:ind]) return self.step(None)[0]
def reset(self): self.ep_return = 0.0 self.newtile = False self.tile_visited_count = 0 self.last_touch_with_track = 0 self.last_new_tile = 0 self.obst_contact = False self.obst_contact_count = 0 self.obst_contact_list = [] self.t = 0.0 self.steps_in_episode = 0 self.state = np.zeros(self.observation_space.shape) self.internal_frames = self.skip_frames * (self.frames_per_state - 1) + 1 self.int_state = np.zeros([STATE_H, STATE_W, self.internal_frames]) if self.track_use >= self.repeat_track * self.episodes_per_track: intento = 0 while intento < 21: success = self._create_track() intento += 1 if success: self.track_use = 0 self.episode_start = range( 0, len(self.track), int(len(self.track) / self.episodes_per_track)) #print(self.episode_start) break if self.verbose > 0: print( intento, " retry to generate new track (normal below 10, limit 20)" ) else: self._create_tiles(self.track, self.border) start_tile = self.episode_start[self.track_use % self.episodes_per_track] #print(start_tile, self.track_use, self.episodes_per_track) if self.car is not None: self.car.destroy() if self.episodes_per_track > 1: self.car = Car(self.world, *self.track[start_tile][1:4]) else: self.car = Car(self.world, *self.track[0][1:4]) #trying to detect two very close reset() if self.action_taken > 2: self.track_use += 1 self.action_taken = 0 #self.track_use += 1 return self.step(None)[0]
def run(self, env, model, img_resize=None, random_start=False): obs = [] actions = [] rewards = [] ob = env.reset() if random_start: #CarRacing random track tile start position = np.random.randint(len(env.track)) env.env.car = Car(env.env.world, *env.env.track[position][1:4]) done = False while not done: if img_resize: ob = ob[0:84, :, :] ob = cv2.resize(ob, dsize=img_resize, interpolation=cv2.INTER_CUBIC) ob_model = torch.tensor(ob / 255).view( 1, img_resize[0], img_resize[1], 3).permute(0, 3, 1, 2).type('torch.FloatTensor') action = model(ob_model.to(self.device)).detach().cpu().numpy()[0] obs.append(ob) actions.append(action) ob, r, done, _ = env.step(action) rewards.append(r) return obs, actions, rewards
def simulate_batch(batch_num): env = CarRacing() obs_data = [] action_data = [] action = env.action_space.sample() for i_episode in range(_BATCH_SIZE): observation = env.reset() # Little hack to make the Car start at random positions in the race-track position = np.random.randint(len(env.track)) env.car = Car(env.world, *env.track[position][1:4]) observation = normalize_observation(observation) obs_sequence = [] for _ in range(_TIME_STEPS): if _RENDER: env.render() action = generate_action(action) observation, reward, done, info = env.step(action) observation = normalize_observation(observation) obs_data.append(observation) print("Saving dataset for batch {}".format(batch_num)) np.save('../data/obs_data_VAE_{}'.format(batch_num), obs_data) env.close()
def reset(self): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] self.human_render = False while True: success = self._create_track() if success: break #print("retry to generate track (normal if there are not many of this messages)") self.car = Car(self.world, *self.track[0][1:4]) return self.step(None)[0]
def play(params, render=True, verbose=False): _NUM_TRIALS = 12 agent_reward = 0 for trial in range(_NUM_TRIALS): observation = env.reset() # Little hack to make the Car start at random positions in the race-track np.random.seed(int(str(time.time()*1000000)[10:13])) position = np.random.randint(len(env.track)) env.car = Car(env.world, *env.track[position][1:4]) total_reward = 0.0 steps = 0 while True: if render: env.render() action = decide_action(observation, params) observation, r, done, info = env.step(action) total_reward += r # NB: done is not True after 1000 steps when using the hack above for # random init of position if verbose and (steps % 200 == 0 or steps == 999): print("\naction " + str(["{:+0.2f}".format(x) for x in action])) print("step {} total_reward {:+0.2f}".format(steps, total_reward)) steps += 1 if steps == 999: break agent_reward += total_reward # If reward is out of scale, clip it agent_reward = np.maximum(-(100*_NUM_TRIALS), agent_reward) return - (agent_reward / _NUM_TRIALS)
def simulate_batch(batch_num): car_env = CarRacing() obs_data = [] action_data = [] action = car_env.action_space.sample() for item in range(batch_size): en_observ = car_env.reset() # this make car to start in random positions position = np.random.randint(len(car_env.track)) car_env.car = Car(car_env.world, *car_env.track[position][1:4]) en_observ = norm_obse(en_observ) obs_sequence = [] # time steps for i in range(steps): if render: car_env.render() action = create_action(action) en_observ, reward, done, info = car_env.step(action) en_observ = norm_obse(en_observ) obs_data.append(en_observ) print("Saving dataset for batch {}".format(batch_num)) np.save('data/TR_data_{}'.format(batch_num), obs_data) car_env.close()
def multiple_runs(on): env = CarRacing() states = [] actions = [] for run in range(MAX_RUNS): state = env.reset() # done = False counter = 0 for game_time in range(MAX_GAME_TIME): # env.render() action = generate_action() state = _process_frame(state) states.append(state) actions.append(action) state, r, done, _ = env.step(action) # print(r) if counter == REST_NUM: print('RUN:{},GT:{},DATA:{}'.format(run, game_time, len(states))) position = np.random.randint(len(env.track)) env.car = Car(env.world, *env.track[position][1:4]) counter = 0 counter += 1 states = np.array(states, dtype=np.uint8) actions = np.array(actions, dtype=np.float16) save_name = 'rollout_v2_{}.npz'.format(on) # np.save(dst + '/' + save_name, frame_and_action) np.savez_compressed(dst + '/' + save_name, action=actions, state=states)
def _randomize_car_pos(self): random_car_position = np.random.randint(len( self.environment.env.track)) self.environment.car = Car( self.environment.world, *self.environment.track[random_car_position][1:4]) obs, _, _, _ = self.step([0, 0, 0]) return obs
def reset(self): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] while True: success = self._create_track() if success: break # if self.verbose == 1: # print("retry to generate track (normal if there are not many of this messages)") self.car = Car(self.world, *self.track[0][1:4], WHEEL_COLOR=self.WHEEL_COLOR, WHEEL_WHITE=self.WHEEL_WHITE, MUD_COLOR=self.MUD_COLOR, HULL_COLOR=self.HULL_COLOR) return self.step(None)[0]
def play(params): with torch.no_grad(): block_print() device = torch.device("cpu") vae_model = vae.ConvVAE(VAE_Z_SIZE, VAE_KL_TOLERANCE) if os.path.exists("checkpoints/vae_checkpoint.pth"): vae_model.load_state_dict( torch.load("checkpoints/vae_checkpoint.pth", map_location=device)) vae_model = vae_model.eval() vae_model.to(device) rnn_model = rnn.MDMRNN(MDN_NUM_MIXTURES, MDN_HIDDEN_SIZE, MDN_INPUT_SIZE, MDN_NUM_LAYERS, MDN_BATCH_SIZE, 1, MDN_OUTPUT_SIZE) if os.path.exists("checkpoints/rnn_checkpoint.pth"): rnn_model.load_state_dict( torch.load("checkpoints/rnn_checkpoint.pth", map_location=device)) rnn_model.to(device) rnn_model = rnn_model.eval() controller_model = controller.Controller(CMA_EMBEDDING_SIZE, CMA_NUM_ACTIONS, params) env = CarRacing() _NUM_TRIALS = 16 agent_reward = 0 for trial in range(_NUM_TRIALS): observation = env.reset() # Little hack to make the Car start at random positions in the race-track np.random.seed(int(str(time.time() * 1000000)[10:13])) position = np.random.randint(len(env.track)) env.car = Car(env.world, *env.track[position][1:4]) hidden_state, cell_state = train_rnn.init_hidden( MDN_NUM_LAYERS, MDN_BATCH_SIZE, MDN_HIDDEN_SIZE, device) total_reward = 0.0 steps = 0 while True: action, hidden_state, cell_state = decide_action( vae_model, rnn_model, controller_model, observation, hidden_state, cell_state, device) observation, r, done, info = env.step(action) total_reward += r # NB: done is not True after 1000 steps when using the hack above for # random init of position steps += 1 if steps == 999: break # If reward is out of scale, clip it total_reward = np.maximum(-100, total_reward) agent_reward += total_reward env.close() return -(agent_reward / _NUM_TRIALS)
def reset(self): print('the time played(total):') print('***********************count is ', self.count) print(self.time_count) print('long term reward for this episode is ', self.long_term_reward) self.long_term_reward = 0 self.time_count += 1 self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] self.count = 0 self.life_count = 0.0 while True: success = self._create_track() if success: break if self.verbose == 1: print( "retry to generate track (normal if there are not many instances of this message)" ) #################################### agent_cars = [] for i in range(number_agent): num_1 = i * degree_d if i == 1: car = Car(self.world, *(0, 225.0, initial_distance_apart)) print('##################################################', self.track[num_1][1:4]) else: car = Car(self.world, *self.track[num_1][1:4]) print if i == 1: ######################################set first car or not car.lead_car = True print('*************************************') agent_cars.append(car) self.car = agent_cars self.car1 = agent_cars #self.car = agent_cars[1] #self.car = Car(self.world, *self.track[70][1:4])#original return self.step(None)[0]
def fast_reset(self): self.car2 = None self.laps = 0 self.on_road = True self.next_road_tile = 0 self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.human_render = False for tile in self.road: tile.road_visited = False self.road_poly = copy.deepcopy(self.original_road_poly) self.car.destroy() self.car = Car(self.world, *self.track[0][1:4]) return self.step(None)
def reset(self): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] while True: success = self._create_track() if success: break if self.verbose == 1: print("retry to generate track (normal if there are not many" "instances of this message)") self.car = Car(self.world, *self.track[0][1:4]) return self.step(None)[0]
def reset(self): self.num_step = 1 self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] while True: success = self._create_track() if success: break # if self.verbose == 1: # print("retry to generate track (normal if there are not many of this messages)") self.car = Car(self.world, *self.track[0][1:4]) self.state_temp = np.zeros((STATE_W, STATE_H, 3), dtype=np.uint8) return self.old_step((0, 0, 0))[0]
def reset(self): self._destroy() self.time = -1.0 self.tile_visited_count = 0 self.state = None self.done = False self.reward = 0.0 self.prev_reward = 0.0 # Build ground self.ground = Ground(self.world, PLAYFIELD, PLAYFIELD) # Build track tiles self.track_tiles_coordinates = TrackCoordinatesBuilder.load_track(self) self.track_tiles = [ TrackTile(self.world, self.track_tiles_coordinates[i], self.track_tiles_coordinates[i - 1]) for i, element in enumerate(self.track_tiles_coordinates) ] # Build cones cones_coordinates = [] for i in range(0, len(self.track_tiles)): sensor_vertices = self.track_tiles[i].b2Data.fixtures[ 0].shape.vertices for j in range(0, len(sensor_vertices)): cones_coordinates.append(sensor_vertices[j]) self.cones = [ Cone(world=self.world, position=(cone_coordinate[0], cone_coordinate[1])) for cone_coordinate in cones_coordinates ] init_angle = 0 init_x, init_y = self.track_tiles[0].position self.car = Car(self.world, init_angle=init_angle, init_x=init_x, init_y=init_y) return self.step(None)[0]
def reset(self): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] self.track_direction = random.choice([-1, 1]) if self.viewer: self.viewer.geoms = [] while True: success = self._create_track() if success: break if self.verbose == 1: print( "retry to generate track (normal if there are not many of this messages)" ) self.car = Car(self.world, *self.track[0][1:4], draw_car=True) return self.step(None)[0]
def main(): print("Generating data for env CarRacing-v0") env = CarRacing() for obs_idx in range(1, 10): env.reset() observations = [] for i in range(1000): position = np.random.randint(len(env.track)) angle = np.random.randint(-20, 20) x_off = np.random.randint(-20, 20) init_data = list(env.track[position][1:4]) init_data[0] += angle init_data[1] += x_off env.car = Car(env.world, *init_data) observation = env.step(None)[0] cropped_obs = normalize_observation( observation[:CROP_SIZE, CROP_W_OFFSET:CROP_SIZE + CROP_W_OFFSET, :]) cropped_obs = cv2.resize(cropped_obs, dsize=(64, 64), interpolation=cv2.INTER_CUBIC).astype( np.float32) np.clip(cropped_obs, 0.0, 1.0, cropped_obs) if i % 10 == 0: print(i) if i % 100 == 0: plt.imshow(cropped_obs) plt.show() observations.append(cropped_obs) observations = np.array(observations, dtype=np.float32) if not os.path.exists("data"): os.mkdir("data") np.save("data/observations_%d.npy" % obs_idx, observations)
def reset(self): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] while True: success = self._create_track() if success: break if self.verbose == 1: print("retry to generate track (normal if there are not many of this messages)") self.car = Car(self.world, *self.track[0][1:4]) return self.step(None)[0]
def multiple_runs(on): env = CarRacing() frame_and_action = [] for run in range(MAX_RUNS): env.reset() # done = False counter = 0 for game_time in range(MAX_GAME_TIME): # env.render() action = generate_action() state, r, done, _ = env.step(action) frame_and_action.append({'state': state, 'action': action}) # print(r) counter += 1 if counter > REST_NUM: print('RUN:{},GT:{},DATA:{}'.format(run, game_time, len(frame_and_action))) position = np.random.randint(len(env.track)) env.car = Car(env.world, *env.track[position][1:4]) counter = 0 save_name = 'rollout_{}.npy'.format(on) np.save(dst + '/' + save_name, frame_and_action)
def simulate_batch(batch_num, save=True, time_steps=None, reduce_size=True): env = CarRacing() if time_steps is None: time_steps = _TIME_STEPS obs_data = [] action_data = [] action = env.action_space.sample() for i_episode in range(_BATCH_SIZE): observation = env.reset() # Little hack to make the Car start at random positions in the race-track position = np.random.randint(len(env.track)) env.car = Car(env.world, *env.track[position][1:4]) observation = normalize_observation(observation, output_4d=False, reduce_size=reduce_size) obs_data.append(observation) for _ in range(time_steps): if _RENDER: env.render() action = generate_action(action) observation, reward, done, info = env.step(action) observation = normalize_observation(observation, output_4d=False, reduce_size=reduce_size) obs_data.append(observation) if save: print("Saving dataset for batch {:03d}".format(batch_num)) np.save('../data/obs_data_VAE_{:03d}'.format(batch_num), obs_data) env.close() return obs_data
class CarRacing(gym.Env): metadata = { 'render.modes': ['human', 'rgb_array', 'state_pixels'], 'video.frames_per_second': FPS } def __init__(self): self.seed() self.contactListener_keepref = FrictionDetector(self) self.world = Box2D.b2World( (0, 0), contactListener=self.contactListener_keepref) self.viewer = None self.invisible_state_window = None self.invisible_video_window = None self.road = None self.car = None self.reward = 0.0 self.prev_reward = 0.0 self.action_space = spaces.Box(np.array([-1, 0, 0]), np.array([+1, +1, +1]), dtype=np.float32) # steer, gas, brake self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8) def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _destroy(self): if not self.road: return for t in self.road: self.world.DestroyBody(t) self.road = [] self.car.destroy() def _create_track(self): CHECKPOINTS = 12 # Create checkpoints checkpoints = [] for c in range(CHECKPOINTS): alpha = 2 * math.pi * c / CHECKPOINTS + self.np_random.uniform( 0, 2 * math.pi * 1 / CHECKPOINTS) rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD) if c == 0: alpha = 0 rad = 1.5 * TRACK_RAD if c == CHECKPOINTS - 1: alpha = 2 * math.pi * c / CHECKPOINTS self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS rad = 1.5 * TRACK_RAD checkpoints.append( (alpha, rad * math.cos(alpha), rad * math.sin(alpha))) #print "\n".join(str(h) for h in checkpoints) #self.road_poly = [ ( # uncomment this to see checkpoints # [ (tx,ty) for a,tx,ty in checkpoints ], # (0.7,0.7,0.9) ) ] self.road = [] # Go from one checkpoint to another to create track x, y, beta = 1.5 * TRACK_RAD, 0, 0 dest_i = 0 laps = 0 track = [] no_freeze = 2500 visited_other_side = False while 1: alpha = math.atan2(y, x) if visited_other_side and alpha > 0: laps += 1 visited_other_side = False if alpha < 0: visited_other_side = True alpha += 2 * math.pi while True: # Find destination from checkpoints failed = True while True: dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)] if alpha <= dest_alpha: failed = False break dest_i += 1 if dest_i % len(checkpoints) == 0: break if not failed: break alpha -= 2 * math.pi continue r1x = math.cos(beta) r1y = math.sin(beta) p1x = -r1y p1y = r1x dest_dx = dest_x - x # vector towards destination dest_dy = dest_y - y proj = r1x * dest_dx + r1y * dest_dy # destination vector projected on rad while beta - alpha > 1.5 * math.pi: beta -= 2 * math.pi while beta - alpha < -1.5 * math.pi: beta += 2 * math.pi prev_beta = beta proj *= SCALE if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001 * proj)) if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001 * proj)) x += p1x * TRACK_DETAIL_STEP y += p1y * TRACK_DETAIL_STEP track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y)) if laps > 4: break no_freeze -= 1 if no_freeze == 0: break #print "\n".join([str(t) for t in enumerate(track)]) # Find closed loop range i1..i2, first loop should be ignored, second is OK i1, i2 = -1, -1 i = len(track) while True: i -= 1 if i == 0: return False # Failed pass_through_start = track[i][0] > self.start_alpha and track[ i - 1][0] <= self.start_alpha if pass_through_start and i2 == -1: i2 = i elif pass_through_start and i1 == -1: i1 = i break #print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2-i1)) assert i1 != -1 assert i2 != -1 track = track[i1:i2 - 1] first_beta = track[0][1] first_perp_x = math.cos(first_beta) first_perp_y = math.sin(first_beta) # Length of perpendicular jump to put together head and tail well_glued_together = np.sqrt( np.square(first_perp_x * (track[0][2] - track[-1][2])) + np.square(first_perp_y * (track[0][3] - track[-1][3]))) if well_glued_together > TRACK_DETAIL_STEP: return False # Red-white border on hard turns border = [False] * len(track) for i in range(len(track)): good = True oneside = 0 for neg in range(BORDER_MIN_COUNT): beta1 = track[i - neg - 0][1] beta2 = track[i - neg - 1][1] good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2 oneside += np.sign(beta1 - beta2) good &= abs(oneside) == BORDER_MIN_COUNT border[i] = good for i in range(len(track)): for neg in range(BORDER_MIN_COUNT): border[i - neg] |= border[i] # Create tiles for i in range(len(track)): alpha1, beta1, x1, y1 = track[i] alpha2, beta2, x2, y2 = track[i - 1] road1_l = (x1 - TRACK_WIDTH * math.cos(beta1), y1 - TRACK_WIDTH * math.sin(beta1)) road1_r = (x1 + TRACK_WIDTH * math.cos(beta1), y1 + TRACK_WIDTH * math.sin(beta1)) road2_l = (x2 - TRACK_WIDTH * math.cos(beta2), y2 - TRACK_WIDTH * math.sin(beta2)) road2_r = (x2 + TRACK_WIDTH * math.cos(beta2), y2 + TRACK_WIDTH * math.sin(beta2)) t = self.world.CreateStaticBody(fixtures=fixtureDef( shape=polygonShape( vertices=[road1_l, road1_r, road2_r, road2_l]))) t.userData = t c = 0.01 * (i % 3) t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c] t.road_visited = False t.road_friction = 1.0 t.fixtures[0].sensor = True self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color)) self.road.append(t) if border[i]: side = np.sign(beta2 - beta1) b1_l = (x1 + side * TRACK_WIDTH * math.cos(beta1), y1 + side * TRACK_WIDTH * math.sin(beta1)) b1_r = (x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1), y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1)) b2_l = (x2 + side * TRACK_WIDTH * math.cos(beta2), y2 + side * TRACK_WIDTH * math.sin(beta2)) b2_r = (x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2), y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2)) self.road_poly.append(([b1_l, b1_r, b2_r, b2_l], (1, 1, 1) if i % 2 == 0 else (1, 0, 0))) self.track = track return True def reset(self): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] self.human_render = False while True: success = self._create_track() if success: break #print("retry to generate track (normal if there are not many of this messages)") self.car = Car(self.world, *self.track[0][1:4]) return self.step(None)[0] def step(self, action): # print(self.t * FPS) if action is not None: self.car.steer(-action[0]) self.car.gas(action[1]) self.car.brake(action[2]) self.car.step(1.0 / FPS) self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) self.t += 1.0 / FPS self.state = self.render("state_pixels") step_reward = 0 done = False if action is not None: # First step without action, called from reset() self.reward -= 0.1 # We actually don't want to count fuel spent, we want car to be faster. #self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER self.car.fuel_spent = 0.0 step_reward = self.reward - self.prev_reward self.prev_reward = self.reward if self.tile_visited_count == len(self.track): done = True x, y = self.car.hull.position if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD: done = True step_reward = -100 return self.state, step_reward, done, {} def render(self, mode='human'): if self.viewer is None: from gym.envs.classic_control import rendering self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) self.score_label = pyglet.text.Label('0000', font_size=36, x=20, y=WINDOW_H * 2.5 / 40.00, anchor_x='left', anchor_y='center', color=(255, 255, 255, 255)) self.transform = rendering.Transform() if "t" not in self.__dict__: return # reset() not called yet zoom = ZOOM * SCALE zoom_state = ZOOM * SCALE * STATE_W / WINDOW_W zoom_video = ZOOM * SCALE * VIDEO_W / WINDOW_W scroll_x = self.car.hull.position[0] scroll_y = self.car.hull.position[1] angle = -self.car.hull.angle vel = self.car.hull.linearVelocity if np.linalg.norm(vel) > 0.5: angle = math.atan2(vel[0], vel[1]) self.transform.set_scale(zoom, zoom) self.transform.set_translation( WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) - scroll_y * zoom * math.sin(angle)), WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) + scroll_y * zoom * math.cos(angle))) self.transform.set_rotation(angle) self.car.draw(self.viewer, mode != "state_pixels") arr = None win = self.viewer.window if mode != 'state_pixels': win.switch_to() win.dispatch_events() if mode == "rgb_array" or mode == "state_pixels": win.clear() t = self.transform if mode == 'rgb_array': VP_W = VIDEO_W VP_H = VIDEO_H else: VP_W = STATE_W VP_H = STATE_H gl.glViewport(0, 0, VP_W, VP_H) t.enable() self.render_road() for geom in self.viewer.onetime_geoms: geom.render() t.disable() self.render_indicators(WINDOW_W, WINDOW_H) # TODO: find why 2x needed, wtf image_data = pyglet.image.get_buffer_manager().get_color_buffer( ).get_image_data() arr = np.fromstring(image_data.data, dtype=np.uint8, sep='') arr = arr.reshape(VP_H, VP_W, 4) arr = arr[::-1, :, 0:3] if mode == "rgb_array" and not self.human_render: # agent can call or not call env.render() itself when recording video. win.flip() if mode == 'human': self.human_render = True win.clear() t = self.transform gl.glViewport(0, 0, WINDOW_W, WINDOW_H) t.enable() self.render_road() for geom in self.viewer.onetime_geoms: geom.render() t.disable() self.render_indicators(WINDOW_W, WINDOW_H) win.flip() self.viewer.onetime_geoms = [] return arr def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None def render_road(self): gl.glBegin(gl.GL_QUADS) gl.glColor4f(0.4, 0.8, 0.4, 1.0) gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0) gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0) gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0) gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0) gl.glColor4f(0.4, 0.9, 0.4, 1.0) k = PLAYFIELD / 20.0 for x in range(-20, 20, 2): for y in range(-20, 20, 2): gl.glVertex3f(k * x + k, k * y + 0, 0) gl.glVertex3f(k * x + 0, k * y + 0, 0) gl.glVertex3f(k * x + 0, k * y + k, 0) gl.glVertex3f(k * x + k, k * y + k, 0) for poly, color in self.road_poly: gl.glColor4f(color[0], color[1], color[2], 1) for p in poly: gl.glVertex3f(p[0], p[1], 0) gl.glEnd() def render_indicators(self, W, H): gl.glBegin(gl.GL_QUADS) s = W / 40.0 h = H / 40.0 gl.glColor4f(0, 0, 0, 1) gl.glVertex3f(W, 0, 0) gl.glVertex3f(W, 5 * h, 0) gl.glVertex3f(0, 5 * h, 0) gl.glVertex3f(0, 0, 0) def vertical_ind(place, val, color): gl.glColor4f(color[0], color[1], color[2], 1) gl.glVertex3f((place + 0) * s, h + h * val, 0) gl.glVertex3f((place + 1) * s, h + h * val, 0) gl.glVertex3f((place + 1) * s, h, 0) gl.glVertex3f((place + 0) * s, h, 0) def horiz_ind(place, val, color): gl.glColor4f(color[0], color[1], color[2], 1) gl.glVertex3f((place + 0) * s, 4 * h, 0) gl.glVertex3f((place + val) * s, 4 * h, 0) gl.glVertex3f((place + val) * s, 2 * h, 0) gl.glVertex3f((place + 0) * s, 2 * h, 0) true_speed = np.sqrt( np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1])) vertical_ind(5, 0.02 * true_speed, (1, 1, 1)) vertical_ind(7, 0.01 * self.car.wheels[0].omega, (0.0, 0, 1)) # ABS sensors vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1)) vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1)) vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1)) horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0)) horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0)) gl.glEnd() self.score_label.text = "%04i" % self.reward self.score_label.draw()
class CarRacing(gym.Env, EzPickle): metadata = { 'render.modes': ['human', 'rgb_array', 'state_pixels'], 'video.frames_per_second' : FPS } def __init__(self, verbose=1): EzPickle.__init__(self) self.seed() self.contactListener_keepref = FrictionDetector(self) self.world = Box2D.b2World((0,0), contactListener=self.contactListener_keepref) self.viewer = None self.invisible_state_window = None self.invisible_video_window = None self.road = None self.car = None self.reward = 0.0 self.prev_reward = 0.0 self.verbose = verbose self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1]), dtype=np.float32) # steer, gas, brake self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8) def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _destroy(self): if not self.road: return for t in self.road: self.world.DestroyBody(t) self.road = [] self.car.destroy() def _create_track(self): CHECKPOINTS = 12 # Create checkpoints checkpoints = [] for c in range(CHECKPOINTS): alpha = 2*math.pi*c/CHECKPOINTS + self.np_random.uniform(0, 2*math.pi*1/CHECKPOINTS) rad = self.np_random.uniform(TRACK_RAD/3, TRACK_RAD) if c==0: alpha = 0 rad = 1.5*TRACK_RAD if c==CHECKPOINTS-1: alpha = 2*math.pi*c/CHECKPOINTS self.start_alpha = 2*math.pi*(-0.5)/CHECKPOINTS rad = 1.5*TRACK_RAD checkpoints.append( (alpha, rad*math.cos(alpha), rad*math.sin(alpha)) ) #print "\n".join(str(h) for h in checkpoints) #self.road_poly = [ ( # uncomment this to see checkpoints # [ (tx,ty) for a,tx,ty in checkpoints ], # (0.7,0.7,0.9) ) ] self.road = [] # Go from one checkpoint to another to create track x, y, beta = 1.5*TRACK_RAD, 0, 0 dest_i = 0 laps = 0 track = [] no_freeze = 2500 visited_other_side = False while 1: alpha = math.atan2(y, x) if visited_other_side and alpha > 0: laps += 1 visited_other_side = False if alpha < 0: visited_other_side = True alpha += 2*math.pi while True: # Find destination from checkpoints failed = True while True: dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)] if alpha <= dest_alpha: failed = False break dest_i += 1 if dest_i % len(checkpoints) == 0: break if not failed: break alpha -= 2*math.pi continue r1x = math.cos(beta) r1y = math.sin(beta) p1x = -r1y p1y = r1x dest_dx = dest_x - x # vector towards destination dest_dy = dest_y - y proj = r1x*dest_dx + r1y*dest_dy # destination vector projected on rad while beta - alpha > 1.5*math.pi: beta -= 2*math.pi while beta - alpha < -1.5*math.pi: beta += 2*math.pi prev_beta = beta proj *= SCALE if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001*proj)) if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001*proj)) x += p1x*TRACK_DETAIL_STEP y += p1y*TRACK_DETAIL_STEP track.append( (alpha,prev_beta*0.5 + beta*0.5,x,y) ) if laps > 4: break no_freeze -= 1 if no_freeze==0: break #print "\n".join([str(t) for t in enumerate(track)]) # Find closed loop range i1..i2, first loop should be ignored, second is OK i1, i2 = -1, -1 i = len(track) while True: i -= 1 if i==0: return False # Failed pass_through_start = track[i][0] > self.start_alpha and track[i-1][0] <= self.start_alpha if pass_through_start and i2==-1: i2 = i elif pass_through_start and i1==-1: i1 = i break if self.verbose == 1: print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2-i1)) assert i1!=-1 assert i2!=-1 track = track[i1:i2-1] first_beta = track[0][1] first_perp_x = math.cos(first_beta) first_perp_y = math.sin(first_beta) # Length of perpendicular jump to put together head and tail well_glued_together = np.sqrt( np.square( first_perp_x*(track[0][2] - track[-1][2]) ) + np.square( first_perp_y*(track[0][3] - track[-1][3]) )) if well_glued_together > TRACK_DETAIL_STEP: return False # Red-white border on hard turns border = [False]*len(track) for i in range(len(track)): good = True oneside = 0 for neg in range(BORDER_MIN_COUNT): beta1 = track[i-neg-0][1] beta2 = track[i-neg-1][1] good &= abs(beta1 - beta2) > TRACK_TURN_RATE*0.2 oneside += np.sign(beta1 - beta2) good &= abs(oneside) == BORDER_MIN_COUNT border[i] = good for i in range(len(track)): for neg in range(BORDER_MIN_COUNT): border[i-neg] |= border[i] # Create tiles for i in range(len(track)): alpha1, beta1, x1, y1 = track[i] alpha2, beta2, x2, y2 = track[i-1] road1_l = (x1 - TRACK_WIDTH*math.cos(beta1), y1 - TRACK_WIDTH*math.sin(beta1)) road1_r = (x1 + TRACK_WIDTH*math.cos(beta1), y1 + TRACK_WIDTH*math.sin(beta1)) road2_l = (x2 - TRACK_WIDTH*math.cos(beta2), y2 - TRACK_WIDTH*math.sin(beta2)) road2_r = (x2 + TRACK_WIDTH*math.cos(beta2), y2 + TRACK_WIDTH*math.sin(beta2)) t = self.world.CreateStaticBody( fixtures = fixtureDef( shape=polygonShape(vertices=[road1_l, road1_r, road2_r, road2_l]) )) t.userData = t c = 0.01*(i%3) t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c] t.road_visited = False t.road_friction = 1.0 t.fixtures[0].sensor = True self.road_poly.append(( [road1_l, road1_r, road2_r, road2_l], t.color )) self.road.append(t) if border[i]: side = np.sign(beta2 - beta1) b1_l = (x1 + side* TRACK_WIDTH *math.cos(beta1), y1 + side* TRACK_WIDTH *math.sin(beta1)) b1_r = (x1 + side*(TRACK_WIDTH+BORDER)*math.cos(beta1), y1 + side*(TRACK_WIDTH+BORDER)*math.sin(beta1)) b2_l = (x2 + side* TRACK_WIDTH *math.cos(beta2), y2 + side* TRACK_WIDTH *math.sin(beta2)) b2_r = (x2 + side*(TRACK_WIDTH+BORDER)*math.cos(beta2), y2 + side*(TRACK_WIDTH+BORDER)*math.sin(beta2)) self.road_poly.append(( [b1_l, b1_r, b2_r, b2_l], (1,1,1) if i%2==0 else (1,0,0) )) self.track = track return True def reset(self): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] while True: success = self._create_track() if success: break if self.verbose == 1: print("retry to generate track (normal if there are not many of this messages)") self.car = Car(self.world, *self.track[0][1:4]) return self.step(None)[0] def step(self, action): if action is not None: self.car.steer(-action[0]) self.car.gas(action[1]) self.car.brake(action[2]) self.car.step(1.0/FPS) self.world.Step(1.0/FPS, 6*30, 2*30) self.t += 1.0/FPS self.state = self.render("state_pixels") step_reward = 0 done = False if action is not None: # First step without action, called from reset() self.reward -= 0.1 # We actually don't want to count fuel spent, we want car to be faster. #self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER self.car.fuel_spent = 0.0 step_reward = self.reward - self.prev_reward self.prev_reward = self.reward if self.tile_visited_count==len(self.track): done = True x, y = self.car.hull.position if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD: done = True step_reward = -100 return self.state, step_reward, done, {} def render(self, mode='human'): assert mode in ['human', 'state_pixels', 'rgb_array'] if self.viewer is None: from gym.envs.classic_control import rendering self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) self.score_label = pyglet.text.Label('0000', font_size=36, x=20, y=WINDOW_H*2.5/40.00, anchor_x='left', anchor_y='center', color=(255,255,255,255)) self.transform = rendering.Transform() if "t" not in self.__dict__: return # reset() not called yet zoom = 0.1*SCALE*max(1-self.t, 0) + ZOOM*SCALE*min(self.t, 1) # Animate zoom first second zoom_state = ZOOM*SCALE*STATE_W/WINDOW_W zoom_video = ZOOM*SCALE*VIDEO_W/WINDOW_W scroll_x = self.car.hull.position[0] scroll_y = self.car.hull.position[1] angle = -self.car.hull.angle vel = self.car.hull.linearVelocity if np.linalg.norm(vel) > 0.5: angle = math.atan2(vel[0], vel[1]) self.transform.set_scale(zoom, zoom) self.transform.set_translation( WINDOW_W/2 - (scroll_x*zoom*math.cos(angle) - scroll_y*zoom*math.sin(angle)), WINDOW_H/4 - (scroll_x*zoom*math.sin(angle) + scroll_y*zoom*math.cos(angle)) ) self.transform.set_rotation(angle) self.car.draw(self.viewer, mode!="state_pixels") arr = None win = self.viewer.window win.switch_to() win.dispatch_events() win.clear() t = self.transform if mode=='rgb_array': VP_W = VIDEO_W VP_H = VIDEO_H elif mode == 'state_pixels': VP_W = STATE_W VP_H = STATE_H else: pixel_scale = 1 if hasattr(win.context, '_nscontext'): pixel_scale = win.context._nscontext.view().backingScaleFactor() # pylint: disable=protected-access VP_W = pixel_scale * WINDOW_W VP_H = pixel_scale * WINDOW_H gl.glViewport(0, 0, VP_W, VP_H) t.enable() self.render_road() for geom in self.viewer.onetime_geoms: geom.render() self.viewer.onetime_geoms = [] t.disable() self.render_indicators(WINDOW_W, WINDOW_H) if mode == 'human': win.flip() return self.viewer.isopen image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data() arr = np.fromstring(image_data.data, dtype=np.uint8, sep='') arr = arr.reshape(VP_H, VP_W, 4) arr = arr[::-1, :, 0:3] return arr def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None def render_road(self): gl.glBegin(gl.GL_QUADS) gl.glColor4f(0.4, 0.8, 0.4, 1.0) gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0) gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0) gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0) gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0) gl.glColor4f(0.4, 0.9, 0.4, 1.0) k = PLAYFIELD/20.0 for x in range(-20, 20, 2): for y in range(-20, 20, 2): gl.glVertex3f(k*x + k, k*y + 0, 0) gl.glVertex3f(k*x + 0, k*y + 0, 0) gl.glVertex3f(k*x + 0, k*y + k, 0) gl.glVertex3f(k*x + k, k*y + k, 0) for poly, color in self.road_poly: gl.glColor4f(color[0], color[1], color[2], 1) for p in poly: gl.glVertex3f(p[0], p[1], 0) gl.glEnd() def render_indicators(self, W, H): gl.glBegin(gl.GL_QUADS) s = W/40.0 h = H/40.0 gl.glColor4f(0,0,0,1) gl.glVertex3f(W, 0, 0) gl.glVertex3f(W, 5*h, 0) gl.glVertex3f(0, 5*h, 0) gl.glVertex3f(0, 0, 0) def vertical_ind(place, val, color): gl.glColor4f(color[0], color[1], color[2], 1) gl.glVertex3f((place+0)*s, h + h*val, 0) gl.glVertex3f((place+1)*s, h + h*val, 0) gl.glVertex3f((place+1)*s, h, 0) gl.glVertex3f((place+0)*s, h, 0) def horiz_ind(place, val, color): gl.glColor4f(color[0], color[1], color[2], 1) gl.glVertex3f((place+0)*s, 4*h , 0) gl.glVertex3f((place+val)*s, 4*h, 0) gl.glVertex3f((place+val)*s, 2*h, 0) gl.glVertex3f((place+0)*s, 2*h, 0) true_speed = np.sqrt(np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1])) vertical_ind(5, 0.02*true_speed, (1,1,1)) vertical_ind(7, 0.01*self.car.wheels[0].omega, (0.0,0,1)) # ABS sensors vertical_ind(8, 0.01*self.car.wheels[1].omega, (0.0,0,1)) vertical_ind(9, 0.01*self.car.wheels[2].omega, (0.2,0,1)) vertical_ind(10,0.01*self.car.wheels[3].omega, (0.2,0,1)) horiz_ind(20, -10.0*self.car.wheels[0].joint.angle, (0,1,0)) horiz_ind(30, -0.8*self.car.hull.angularVelocity, (1,0,0)) gl.glEnd() self.score_label.text = "%04i" % self.reward self.score_label.draw()
class CarRacingSoft(gym.Env, EzPickle): metadata = {'render.modes': ['human'], 'video.frames_per_second': FPS} color_black = np.array([0., 0., 0.]) color_white = np.array([1., 1., 1.]) color_red = np.array([1., 0., 0.]) color_green = np.array([0., 1., 0.]) color_grass_dark = np.array([0.4, 0.8, 0.4]) color_grass_light = np.array([0.4, 0.9, 0.4]) color_abs_light = np.array([0., 0., 1.]) color_abs_dark = np.array([0.2, 0., 1.]) def __init__(self, frame_skip, verbose=False): EzPickle.__init__(self) if frame_skip < 1: raise ValueError("The value of frame_skip must be at least 1") self.seed() self.contactListener_keepref = FrictionDetector(self) self.world = Box2D.b2World( (0, 0), contactListener=self.contactListener_keepref) self.viewer = None self.invisible_state_window = None self.invisible_video_window = None self.road = None self.car = None self.reward = 0.0 self.prev_reward = 0.0 self.verbose = verbose self.fd_tile = fixtureDef(shape=polygonShape( vertices=[(0, 0), (1, 0), (1, -1), (0, -1)])) self.action_space = spaces.Box(np.array([-1, 0, 0], dtype=np.float32), np.array([+1, +1, +1], dtype=np.float32), dtype=np.float32) # steer, gas, brake self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.float32) self.state = np.zeros([STATE_H, STATE_W, 3], dtype=np.float32) self.frame_skip = frame_skip def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _destroy(self): if not self.road: return for t in self.road: self.world.DestroyBody(t) self.road = [] self.car.destroy() def _create_track(self): CHECKPOINTS = 12 # Create checkpoints checkpoints = [] for c in range(CHECKPOINTS): alpha = 2 * math.pi * c / CHECKPOINTS + self.np_random.uniform( 0, 2 * math.pi * 1 / CHECKPOINTS) rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD) if c == 0: alpha = 0 rad = 1.5 * TRACK_RAD if c == CHECKPOINTS - 1: alpha = 2 * math.pi * c / CHECKPOINTS self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS rad = 1.5 * TRACK_RAD checkpoints.append( (alpha, rad * math.cos(alpha), rad * math.sin(alpha))) self.road = [] # Go from one checkpoint to another to create track x, y, beta = 1.5 * TRACK_RAD, 0, 0 dest_i = 0 laps = 0 track = [] no_freeze = 2500 visited_other_side = False while True: alpha = math.atan2(y, x) if visited_other_side and alpha > 0: laps += 1 visited_other_side = False if alpha < 0: visited_other_side = True alpha += 2 * math.pi while True: # Find destination from checkpoints failed = True while True: dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)] if alpha <= dest_alpha: failed = False break dest_i += 1 if dest_i % len(checkpoints) == 0: break if not failed: break alpha -= 2 * math.pi continue r1x = math.cos(beta) r1y = math.sin(beta) p1x = -r1y p1y = r1x dest_dx = dest_x - x # vector towards destination dest_dy = dest_y - y proj = r1x * dest_dx + r1y * dest_dy # destination vector projected on rad while beta - alpha > 1.5 * math.pi: beta -= 2 * math.pi while beta - alpha < -1.5 * math.pi: beta += 2 * math.pi prev_beta = beta proj *= SCALE if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001 * proj)) if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001 * proj)) x += p1x * TRACK_DETAIL_STEP y += p1y * TRACK_DETAIL_STEP track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y)) if laps > 4: break no_freeze -= 1 if no_freeze == 0: break # Find closed loop range i1..i2, first loop should be ignored, second is OK i1, i2 = -1, -1 i = len(track) while True: i -= 1 if i == 0: return False # Failed pass_through_start = track[i][0] > self.start_alpha and track[ i - 1][0] <= self.start_alpha if pass_through_start and i2 == -1: i2 = i elif pass_through_start and i1 == -1: i1 = i break if self.verbose: print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1)) assert i1 != -1 assert i2 != -1 track = track[i1:i2 - 1] first_beta = track[0][1] first_perp_x = math.cos(first_beta) first_perp_y = math.sin(first_beta) # Length of perpendicular jump to put together head and tail well_glued_together = np.sqrt( np.square(first_perp_x * (track[0][2] - track[-1][2])) + np.square(first_perp_y * (track[0][3] - track[-1][3]))) if well_glued_together > TRACK_DETAIL_STEP: return False # Red-white border on hard turns border = [False] * len(track) for i in range(len(track)): good = True oneside = 0 for neg in range(BORDER_MIN_COUNT): beta1 = track[i - neg - 0][1] beta2 = track[i - neg - 1][1] good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2 oneside += np.sign(beta1 - beta2) good &= abs(oneside) == BORDER_MIN_COUNT border[i] = good for i in range(len(track)): for neg in range(BORDER_MIN_COUNT): border[i - neg] |= border[i] # Create tiles for i in range(len(track)): alpha1, beta1, x1, y1 = track[i] alpha2, beta2, x2, y2 = track[i - 1] road1_l = (x1 - TRACK_WIDTH * math.cos(beta1), y1 - TRACK_WIDTH * math.sin(beta1)) road1_r = (x1 + TRACK_WIDTH * math.cos(beta1), y1 + TRACK_WIDTH * math.sin(beta1)) road2_l = (x2 - TRACK_WIDTH * math.cos(beta2), y2 - TRACK_WIDTH * math.sin(beta2)) road2_r = (x2 + TRACK_WIDTH * math.cos(beta2), y2 + TRACK_WIDTH * math.sin(beta2)) vertices = [road1_l, road1_r, road2_r, road2_l] self.fd_tile.shape.vertices = vertices t = self.world.CreateStaticBody(fixtures=self.fd_tile) t.userData = t c = 0.01 * (i % 3) t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c] t.road_visited = False t.road_friction = 1.0 t.fixtures[0].sensor = True self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color)) self.road.append(t) if border[i]: side = np.sign(beta2 - beta1) b1_l = (x1 + side * TRACK_WIDTH * math.cos(beta1), y1 + side * TRACK_WIDTH * math.sin(beta1)) b1_r = (x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1), y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1)) b2_l = (x2 + side * TRACK_WIDTH * math.cos(beta2), y2 + side * TRACK_WIDTH * math.sin(beta2)) b2_r = (x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2), y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2)) self.road_poly.append(([b1_l, b1_r, b2_r, b2_l], (1, 1, 1) if i % 2 == 0 else (1, 0, 0))) self.track = track return True def reset(self): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] self.human_render = False self.frames = 0 while True: success = self._create_track() if success: break if self.verbose: print( "retry to generate track (normal if there are not many instances of this message)" ) self.car = Car(self.world, *self.track[0][1:4]) return self.step(None)[0] def step(self, action): total_reward = 0 for _ in range(self.frame_skip): if action is not None: self.car.steer(-action[0]) self.car.gas(action[1]) self.car.brake(action[2]) self.car.step(1.0 / FPS) self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) self.t += 1.0 / FPS step_reward = 0 done = False if action is not None: # First step without action, called from reset() self.reward -= 0.1 # We actually don't want to count fuel spent, we want car to be faster. # self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER self.car.fuel_spent = 0.0 step_reward = self.reward - self.prev_reward self.prev_reward = self.reward if self.tile_visited_count == len(self.track): done = True x, y = self.car.hull.position if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD: done = True step_reward = -100 total_reward += step_reward self.frames += 1 if self.frames > 1000: done = True if done or action is None: break self._draw() green = (self.state[66:78, 43:52, 1] > 0.5) # print("green:", sum(green.flatten())) speed = sum(self.state[85:, 2, 0]) abs1 = sum(self.state[85:, 9, 2]) abs2 = sum(self.state[85:, 14, 2]) abs3 = sum(self.state[85:, 19, 2]) abs4 = sum(self.state[85:, 24, 2]) steering_input_left = sum(self.state[90, 37:48, 1]) steering_input_right = sum(self.state[90, 47:58, 1]) steering = steering_input_right - steering_input_left rotation_left = sum(self.state[90, 59:72, 0]) rotation_right = sum(self.state[90, 72:85, 0]) rotation = rotation_right - rotation_left print( f"speed:{speed}\tabs:\t{abs1}\t{abs2}\t{abs3}\t{abs4}\tsteering:{steering}\trotation:{rotation}" ) return np.copy(self.state), total_reward, done, {} def render(self, mode='human', close=False): if close: if self.viewer is not None: self.viewer.close() self.viewer = None return if self.viewer is None: from gym.envs.classic_control import rendering self.viewer = rendering.SimpleImageViewer() self.viewer.imshow((self.state.repeat(RENDER_UPSCALE, axis=0).repeat( RENDER_UPSCALE, axis=1) * 255).astype(np.uint8)) def _draw(self): # Simple 2D affine transformation class class Transform(): def __init__(self, *values): self.matrix = values if len(values) else [ 1., 0., 0., 0., 1., 0., 0., 0., 1. ] @staticmethod def translation(x, y): return Transform(1.0, 0.0, x, 0.0, 1.0, y, 0.0, 0.0, 1.0) @staticmethod def scale(x, y): return Transform(x, 0.0, 0.0, 0.0, y, 0.0, 0.0, 0.0, 1.0) @staticmethod def rotation(angle): cos, sin = math.cos(angle), math.sin(angle) return Transform(cos, -sin, 0.0, sin, cos, 0.0, 0.0, 0.0, 1.0) def apply_and_swap(self, point): sa, sb, sc, sd, se, sf, _, _, _ = self.matrix x, y = point return (x * sd + y * se + sf, x * sa + y * sb + sc) def __mul__(self, other): sa, sb, sc, sd, se, sf, _, _, _ = self.matrix oa, ob, oc, od, oe, of, _, _, _ = other.matrix return Transform(sa * oa + sb * od, sa * ob + sb * oe, sa * oc + sb * of + sc, sd * oa + se * od, sd * ob + se * oe, sd * oc + se * of + sf, 0.0, 0.0, 1.0) def __imul__(self, other): return self.__mul__(other) class Renderer(): def __init__(self, env): self.env = env def draw_polygon(self, path, color): self.env._fill_polygon(path, self.env.state, color) if "t" not in self.__dict__: return # reset() not called yet zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min( self.t, 1) # Animate zoom first second scroll_x = self.car.hull.position[0] scroll_y = self.car.hull.position[1] angle = -self.car.hull.angle vel = self.car.hull.linearVelocity if np.linalg.norm(vel) > 0.5: angle = math.atan2(vel[0], vel[1]) self.transform = Transform.translation(STATE_W / 2, STATE_H * 3 / 4) self.transform *= Transform.scale(STATE_W / 1000, STATE_H / 1000) self.transform *= Transform.scale(zoom, -zoom) self.transform *= Transform.rotation(angle) self.transform *= Transform.translation(-scroll_x, -scroll_y) # Clear self.state[:, :, :] = self.color_black # Draw road, car and indicators self._render_road(scroll_x, scroll_y, zoom) self.car.draw(Renderer(self), False) self._render_indicators() def _render_road(self, scroll_x, scroll_y, zoom): self._fill_polygon([(-PLAYFIELD, +PLAYFIELD), (+PLAYFIELD, +PLAYFIELD), (+PLAYFIELD, -PLAYFIELD), (-PLAYFIELD, -PLAYFIELD)], self.state, self.color_grass_dark) k = PLAYFIELD / 20.0 mindist = 2000000 / (zoom**2) for x in range(-20, 20, 2): kx = k * x dist = (kx - scroll_x)**2 if dist >= mindist: continue for y in range(-20, 20, 2): ky = k * y if dist + (ky - scroll_y)**2 >= mindist: continue self._fill_polygon([(kx + k, ky + 0), (kx + 0, ky + 0), (kx + 0, ky + k), (kx + k, ky + k)], self.state, self.color_grass_light) for poly, color in self.road_poly: if (poly[0][0] - scroll_x)**2 + (poly[0][1] - scroll_y)**2 >= mindist: continue self._fill_polygon(poly, self.state, color) def _render_indicators(self): s = STATE_W / 40 h = STATE_H / 40 self._fill_polygon([(0, STATE_H), (STATE_W, STATE_H), (STATE_W, STATE_H - 5 * h), (0, STATE_H - 5 * h)], self.state, self.color_black, transform=False) def vertical_ind(place, val, color): self._fill_polygon([((place + 0) * s, STATE_H - h - h * val), ((place + 2) * s, STATE_H - h - h * val), ((place + 2) * s, STATE_H - h), ((place + 0) * s, STATE_H - h)], self.state, color, transform=False) def horiz_ind(place, val, color): self._fill_polygon([((place + 0) * s, STATE_H - 4 * h), ((place + val) * s, STATE_H - 4 * h), ((place + val) * s, STATE_H - 1.5 * h), ((place + 0) * s, STATE_H - 1.5 * h)], self.state, color, transform=False) true_speed = np.sqrt( np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1])) vertical_ind(1, 0.02 * true_speed, self.color_white) vertical_ind(4, 0.01 * self.car.wheels[0].omega, self.color_abs_light) # ABS sensors vertical_ind(6, 0.01 * self.car.wheels[1].omega, self.color_abs_light) vertical_ind(8, 0.01 * self.car.wheels[2].omega, self.color_abs_dark) vertical_ind(10, 0.01 * self.car.wheels[3].omega, self.color_abs_dark) horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, self.color_green) horiz_ind(30, -0.8 * self.car.hull.angularVelocity, self.color_red) # Adapted from https://github.com/luispedro/mahotas/blob/master/mahotas/polygon.py def _fill_polygon(self, polygon, canvas, color, transform=True): ''' fill_polygon([(y0,x0), (y1,x1),...], canvas, color=1) Draw a filled polygon in canvas Parameters ---------- polygon : list of pairs a list of (y,x) points canvas : ndarray where to draw, will be modified in place color : integer, optional which colour to use (default: 1) ''' # algorithm adapted from: http://www.alienryderflex.com/polygon_fill/ if not len(polygon): return if transform: polygon = [ self.transform.apply_and_swap(point) for point in polygon ] else: polygon = [(float(y), float(x)) for x, y in polygon] min_y = max(int(min(y for y, x in polygon)), 0) if min_y >= canvas.shape[0]: return max_y = min(max(int(max(y + 1 for y, x in polygon)), 0), canvas.shape[0]) if max_y <= 0: return if min(x for y, x in polygon) >= canvas.shape[1]: return if max(x for y, x in polygon) < 0: return for y in range(min_y, max_y): nodes = [] j = -1 for i, p in enumerate(polygon): pj = polygon[j] if p[0] < y and pj[0] >= y or pj[0] < y and p[0] >= y: dy = pj[0] - p[0] if dy: nodes.append((p[1] + (y - p[0]) / (pj[0] - p[0]) * (pj[1] - p[1]))) elif p[0] == y: nodes.append(p[1]) j = i nodes.sort() for n, nn in zip(nodes[::2], nodes[1::2]): canvas[y, max(int(n), 0):min(max(int(nn), 0), canvas.shape[1] )] = color
class CarRacing(gym.Env, EzPickle): metadata = { 'render.modes': ['human', 'rgb_array', 'state_pixels'], 'video.frames_per_second': FPS } def __init__(self, verbose=1): EzPickle.__init__(self) self.seed() self.contactListener_keepref = FrictionDetector(self) self.world = Box2D.b2World( (0, 0), contactListener=self.contactListener_keepref) self.viewer = None self.invisible_state_window = None self.invisible_video_window = None self.road = None self.car = None self.reward = 0.0 self.prev_reward = 0.0 self.verbose = verbose self.fd_tile = fixtureDef(shape=polygonShape( vertices=[(0, 0), (1, 0), (1, -1), (0, -1)])) self.action_space = spaces.Box(np.array([-1, 0, 0]), np.array([+1, +1, +1]), dtype=np.float32) # steer, gas, brake self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8) CHECKPOINTS = 12 # Create checkpoints # TODO Use a real way to keep a constant track across training runs self.checkpoints = [] self.checkpoints.append((0, 225.0, 0.0)) self.checkpoints.append( (0.7825323624208509, 89.6427647192468, 89.1304349066121)) self.checkpoints.append( (1.5543323243350344, 1.783985395462951, 108.34693482727236)) self.checkpoints.append( (1.6057305460922464, -2.2173644459530517, 63.446740574663174)) self.checkpoints.append( (2.6175081644916047, -58.76396976672586, 33.965461046114534)) self.checkpoints.append( (2.7871461118931458, -134.63944761816262, 49.82679389320398)) self.checkpoints.append( (3.414113547480756, -106.41825645850612, -29.741137759708423)) self.checkpoints.append( (3.8745797378794, -77.61403468584427, -69.87679530100709)) self.checkpoints.append( (4.193711736042842, -33.56139367373087, -58.79641863577176)) self.checkpoints.append( (4.928823629511352, 29.852520836123745, -135.76810358020867)) self.checkpoints.append( (5.29734709463665, 68.99766439052978, -104.18233435806235)) self.checkpoints.append( (5.759586531581287, 194.85571585149864, -112.5000000000001)) self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS ''' self.checkpoints = [] for c in range(CHECKPOINTS): alpha = 2*math.pi*c/CHECKPOINTS + self.np_random.uniform(0, 2*math.pi*1/CHECKPOINTS) rad = self.np_random.uniform(TRACK_RAD/3, TRACK_RAD) if c==0: alpha = 0 rad = 1.5*TRACK_RAD if c==CHECKPOINTS-1: alpha = 2*math.pi*c/CHECKPOINTS rad = 1.5*TRACK_RAD self.checkpoints.append( (alpha, rad*math.cos(alpha), rad*math.sin(alpha)) ) ''' def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _destroy(self): if not self.road: return for t in self.road: self.world.DestroyBody(t) self.road = [] self.car.destroy() def _create_track(self): checkpoints = self.checkpoints #print ("\n".join(str(h) for h in checkpoints)) # self.road_poly = [ ( # uncomment this to see checkpoints # [ (tx,ty) for a,tx,ty in checkpoints ], # (0.7,0.7,0.9) ) ] self.road = [] # Go from one checkpoint to another to create track x, y, beta = 1.5 * TRACK_RAD, 0, 0 dest_i = 0 laps = 0 track = [] no_freeze = 2500 visited_other_side = False while True: alpha = math.atan2(y, x) if visited_other_side and alpha > 0: laps += 1 visited_other_side = False if alpha < 0: visited_other_side = True alpha += 2 * math.pi while True: # Find destination from checkpoints failed = True while True: dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)] if alpha <= dest_alpha: failed = False break dest_i += 1 if dest_i % len(checkpoints) == 0: break if not failed: break alpha -= 2 * math.pi continue r1x = math.cos(beta) r1y = math.sin(beta) p1x = -r1y p1y = r1x dest_dx = dest_x - x # vector towards destination dest_dy = dest_y - y proj = r1x * dest_dx + r1y * dest_dy # destination vector projected on rad while beta - alpha > 1.5 * math.pi: beta -= 2 * math.pi while beta - alpha < -1.5 * math.pi: beta += 2 * math.pi prev_beta = beta proj *= SCALE if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001 * proj)) if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001 * proj)) x += p1x * TRACK_DETAIL_STEP y += p1y * TRACK_DETAIL_STEP track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y)) if laps > 4: break no_freeze -= 1 if no_freeze == 0: break # print "\n".join([str(t) for t in enumerate(track)]) # Find closed loop range i1..i2, first loop should be ignored, second is OK i1, i2 = -1, -1 i = len(track) while True: i -= 1 if i == 0: return False # Failed pass_through_start = track[i][0] > self.start_alpha and track[ i - 1][0] <= self.start_alpha if pass_through_start and i2 == -1: i2 = i elif pass_through_start and i1 == -1: i1 = i break if self.verbose == 1: print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1)) assert i1 != -1 assert i2 != -1 track = track[i1:i2 - 1] first_beta = track[0][1] first_perp_x = math.cos(first_beta) first_perp_y = math.sin(first_beta) # Length of perpendicular jump to put together head and tail well_glued_together = np.sqrt( np.square(first_perp_x * (track[0][2] - track[-1][2])) + np.square(first_perp_y * (track[0][3] - track[-1][3]))) if well_glued_together > TRACK_DETAIL_STEP: return False # Red-white border on hard turns ''' border = [False]*len(track) for i in range(len(track)): good = True oneside = 0 for neg in range(BORDER_MIN_COUNT): beta1 = track[i-neg-0][1] beta2 = track[i-neg-1][1] good &= abs(beta1 - beta2) > TRACK_TURN_RATE*0.2 oneside += np.sign(beta1 - beta2) good &= abs(oneside) == BORDER_MIN_COUNT border[i] = good for i in range(len(track)): for neg in range(BORDER_MIN_COUNT): border[i-neg] |= border[i] ''' # Create tiles for i in range(len(track)): alpha1, beta1, x1, y1 = track[i] alpha2, beta2, x2, y2 = track[i - 1] road1_l = (x1 - TRACK_WIDTH * math.cos(beta1), y1 - TRACK_WIDTH * math.sin(beta1)) road1_r = (x1 + TRACK_WIDTH * math.cos(beta1), y1 + TRACK_WIDTH * math.sin(beta1)) road2_l = (x2 - TRACK_WIDTH * math.cos(beta2), y2 - TRACK_WIDTH * math.sin(beta2)) road2_r = (x2 + TRACK_WIDTH * math.cos(beta2), y2 + TRACK_WIDTH * math.sin(beta2)) vertices = [road1_l, road1_r, road2_r, road2_l] self.fd_tile.shape.vertices = vertices t = self.world.CreateStaticBody(fixtures=self.fd_tile) t.userData = t c = 0.01 * (i % 3) t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c] t.road_visited = False t.road_friction = 1.0 t.fixtures[0].sensor = True self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color)) self.road.append(t) ''' if border[i]: side = np.sign(beta2 - beta1) b1_l = (x1 + side* TRACK_WIDTH *math.cos(beta1), y1 + side* TRACK_WIDTH *math.sin(beta1)) b1_r = (x1 + side*(TRACK_WIDTH+BORDER)*math.cos(beta1), y1 + side*(TRACK_WIDTH+BORDER)*math.sin(beta1)) b2_l = (x2 + side* TRACK_WIDTH *math.cos(beta2), y2 + side* TRACK_WIDTH *math.sin(beta2)) b2_r = (x2 + side*(TRACK_WIDTH+BORDER)*math.cos(beta2), y2 + side*(TRACK_WIDTH+BORDER)*math.sin(beta2)) self.road_poly.append(( [b1_l, b1_r, b2_r, b2_l], (1,1,1) if i%2==0 else (1,0,0) )) ''' self.track = track return True def reset(self): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] while True: success = self._create_track() if success: break if self.verbose == 1: print( "retry to generate track (normal if there are not many of this messages)" ) self.car = Car(self.world, *self.track[0][1:4]) return self.step(None)[0] def step(self, action): if action is not None: self.car.steer(-action[0]) self.car.gas(action[1]) self.car.brake(action[2]) self.car.step(1.0 / FPS) self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) self.t += 1.0 / FPS self.state = self.render("state_pixels") step_reward = 0 done = False if action is not None: # First step without action, called from reset() self.reward -= 0.1 # We actually don't want to count fuel spent, we want car to be faster. # self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER self.car.fuel_spent = 0.0 step_reward = self.reward - self.prev_reward self.prev_reward = self.reward if self.tile_visited_count == len(self.track): done = True x, y = self.car.hull.position if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD: done = True step_reward = -100 # TODO: Exit if far off track return self.state, step_reward, done, {} def render(self, mode='human'): assert mode in ['human', 'state_pixels', 'rgb_array'] if self.viewer is None: from gym.envs.classic_control import rendering self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) self.score_label = pyglet.text.Label('0000', font_size=36, x=20, y=WINDOW_H * 2.5 / 40.00, anchor_x='left', anchor_y='center', color=(255, 255, 255, 255)) self.transform = rendering.Transform() if "t" not in self.__dict__: return # reset() not called yet zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min( self.t, 1) # Animate zoom first second zoom_state = ZOOM * SCALE * STATE_W / WINDOW_W zoom_video = ZOOM * SCALE * VIDEO_W / WINDOW_W scroll_x = self.car.hull.position[0] scroll_y = self.car.hull.position[1] angle = -self.car.hull.angle vel = self.car.hull.linearVelocity #if np.linalg.norm(vel) > 0.5: # angle = math.atan2(vel[0], vel[1]) self.transform.set_scale(zoom, zoom) self.transform.set_translation( WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) - scroll_y * zoom * math.sin(angle)), WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) + scroll_y * zoom * math.cos(angle))) self.transform.set_rotation(angle) self.car.draw(self.viewer, mode != "state_pixels") arr = None win = self.viewer.window win.switch_to() win.dispatch_events() win.clear() t = self.transform if mode == 'rgb_array': VP_W = VIDEO_W VP_H = VIDEO_H elif mode == 'state_pixels': VP_W = STATE_W VP_H = STATE_H else: pixel_scale = 1 if hasattr(win.context, '_nscontext'): pixel_scale = win.context._nscontext.view().backingScaleFactor( ) # pylint: disable=protected-access VP_W = int(pixel_scale * WINDOW_W) VP_H = int(pixel_scale * WINDOW_H) gl.glViewport(0, 0, VP_W, VP_H) t.enable() self.render_road() for geom in self.viewer.onetime_geoms: geom.render() self.viewer.onetime_geoms = [] t.disable() # self.render_indicators(WINDOW_W, WINDOW_H) if mode == 'human': win.flip() return self.viewer.isopen image_data = pyglet.image.get_buffer_manager().get_color_buffer( ).get_image_data() arr = np.fromstring(image_data.data, dtype=np.uint8, sep='') arr = arr.reshape(VP_H, VP_W, 4) arr = arr[::-1, :, 0:3] return arr def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None def render_road(self): gl.glBegin(gl.GL_QUADS) gl.glColor4f(0.4, 0.8, 0.4, 1.0) gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0) gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0) gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0) gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0) gl.glColor4f(0.4, 0.9, 0.4, 1.0) k = PLAYFIELD / 20.0 #for x in range(-20, 20, 2): # for y in range(-20, 20, 2): # gl.glVertex3f(k*x + k, k*y + 0, 0) # gl.glVertex3f(k*x + 0, k*y + 0, 0) # gl.glVertex3f(k*x + 0, k*y + k, 0) # gl.glVertex3f(k*x + k, k*y + k, 0) for poly, color in self.road_poly: gl.glColor4f(color[0], color[1], color[2], 1) for p in poly: gl.glVertex3f(p[0], p[1], 0) gl.glEnd() def render_indicators(self, W, H): gl.glBegin(gl.GL_QUADS) s = W / 40.0 h = H / 40.0 gl.glColor4f(0, 0, 0, 1) gl.glVertex3f(W, 0, 0) gl.glVertex3f(W, 5 * h, 0) gl.glVertex3f(0, 5 * h, 0) gl.glVertex3f(0, 0, 0) def vertical_ind(place, val, color): gl.glColor4f(color[0], color[1], color[2], 1) gl.glVertex3f((place + 0) * s, h + h * val, 0) gl.glVertex3f((place + 1) * s, h + h * val, 0) gl.glVertex3f((place + 1) * s, h, 0) gl.glVertex3f((place + 0) * s, h, 0) def horiz_ind(place, val, color): gl.glColor4f(color[0], color[1], color[2], 1) gl.glVertex3f((place + 0) * s, 4 * h, 0) gl.glVertex3f((place + val) * s, 4 * h, 0) gl.glVertex3f((place + val) * s, 2 * h, 0) gl.glVertex3f((place + 0) * s, 2 * h, 0) true_speed = np.sqrt( np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1])) vertical_ind(5, 0.02 * true_speed, (1, 1, 1)) vertical_ind(7, 0.01 * self.car.wheels[0].omega, (0.0, 0, 1)) # ABS sensors vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1)) vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1)) vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1)) horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0)) horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0)) gl.glEnd() self.score_label.text = "%04i" % self.reward self.score_label.draw()
class CarRacing(gym.Env, EzPickle): metadata = { "render.modes": ["human", "rgb_array", "state_pixels"], "video.frames_per_second": FPS, } def __init__(self, verbose=1): EzPickle.__init__(self) self.seed() self.contactListener_keepref = FrictionDetector(self) self.world = Box2D.b2World((0, 0), contactListener=self.contactListener_keepref) self.viewer = None self.invisible_state_window = None self.invisible_video_window = None self.road = None self.car = None self.reward = 0.0 self.prev_reward = 0.0 self.verbose = verbose self.fd_tile = fixtureDef( shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]) ) self.action_space = spaces.Box( np.array([-1, 0, 0]).astype(np.float32), np.array([+1, +1, +1]).astype(np.float32), ) # steer, gas, brake self.observation_space = spaces.Box( low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8 ) def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _destroy(self): if not self.road: return for t in self.road: self.world.DestroyBody(t) self.road = [] self.car.destroy() def _create_track(self): CHECKPOINTS = 12 # Create checkpoints checkpoints = [] for c in range(CHECKPOINTS): noise = self.np_random.uniform(0, 2 * math.pi * 1 / CHECKPOINTS) alpha = 2 * math.pi * c / CHECKPOINTS + noise rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD) if c == 0: alpha = 0 rad = 1.5 * TRACK_RAD if c == CHECKPOINTS - 1: alpha = 2 * math.pi * c / CHECKPOINTS self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS rad = 1.5 * TRACK_RAD checkpoints.append((alpha, rad * math.cos(alpha), rad * math.sin(alpha))) self.road = [] # Go from one checkpoint to another to create track x, y, beta = 1.5 * TRACK_RAD, 0, 0 dest_i = 0 laps = 0 track = [] no_freeze = 2500 visited_other_side = False while True: alpha = math.atan2(y, x) if visited_other_side and alpha > 0: laps += 1 visited_other_side = False if alpha < 0: visited_other_side = True alpha += 2 * math.pi while True: # Find destination from checkpoints failed = True while True: dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)] if alpha <= dest_alpha: failed = False break dest_i += 1 if dest_i % len(checkpoints) == 0: break if not failed: break alpha -= 2 * math.pi continue r1x = math.cos(beta) r1y = math.sin(beta) p1x = -r1y p1y = r1x dest_dx = dest_x - x # vector towards destination dest_dy = dest_y - y # destination vector projected on rad: proj = r1x * dest_dx + r1y * dest_dy while beta - alpha > 1.5 * math.pi: beta -= 2 * math.pi while beta - alpha < -1.5 * math.pi: beta += 2 * math.pi prev_beta = beta proj *= SCALE if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001 * proj)) if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001 * proj)) x += p1x * TRACK_DETAIL_STEP y += p1y * TRACK_DETAIL_STEP track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y)) if laps > 4: break no_freeze -= 1 if no_freeze == 0: break # Find closed loop range i1..i2, first loop should be ignored, second is OK i1, i2 = -1, -1 i = len(track) while True: i -= 1 if i == 0: return False # Failed pass_through_start = ( track[i][0] > self.start_alpha and track[i - 1][0] <= self.start_alpha ) if pass_through_start and i2 == -1: i2 = i elif pass_through_start and i1 == -1: i1 = i break if self.verbose == 1: print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1)) assert i1 != -1 assert i2 != -1 track = track[i1 : i2 - 1] first_beta = track[0][1] first_perp_x = math.cos(first_beta) first_perp_y = math.sin(first_beta) # Length of perpendicular jump to put together head and tail well_glued_together = np.sqrt( np.square(first_perp_x * (track[0][2] - track[-1][2])) + np.square(first_perp_y * (track[0][3] - track[-1][3])) ) if well_glued_together > TRACK_DETAIL_STEP: return False # Red-white border on hard turns border = [False] * len(track) for i in range(len(track)): good = True oneside = 0 for neg in range(BORDER_MIN_COUNT): beta1 = track[i - neg - 0][1] beta2 = track[i - neg - 1][1] good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2 oneside += np.sign(beta1 - beta2) good &= abs(oneside) == BORDER_MIN_COUNT border[i] = good for i in range(len(track)): for neg in range(BORDER_MIN_COUNT): border[i - neg] |= border[i] # Create tiles for i in range(len(track)): alpha1, beta1, x1, y1 = track[i] alpha2, beta2, x2, y2 = track[i - 1] road1_l = ( x1 - TRACK_WIDTH * math.cos(beta1), y1 - TRACK_WIDTH * math.sin(beta1), ) road1_r = ( x1 + TRACK_WIDTH * math.cos(beta1), y1 + TRACK_WIDTH * math.sin(beta1), ) road2_l = ( x2 - TRACK_WIDTH * math.cos(beta2), y2 - TRACK_WIDTH * math.sin(beta2), ) road2_r = ( x2 + TRACK_WIDTH * math.cos(beta2), y2 + TRACK_WIDTH * math.sin(beta2), ) vertices = [road1_l, road1_r, road2_r, road2_l] self.fd_tile.shape.vertices = vertices t = self.world.CreateStaticBody(fixtures=self.fd_tile) t.userData = t c = 0.01 * (i % 3) t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c] t.road_visited = False t.road_friction = 1.0 t.fixtures[0].sensor = True self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color)) self.road.append(t) if border[i]: side = np.sign(beta2 - beta1) b1_l = ( x1 + side * TRACK_WIDTH * math.cos(beta1), y1 + side * TRACK_WIDTH * math.sin(beta1), ) b1_r = ( x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1), y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1), ) b2_l = ( x2 + side * TRACK_WIDTH * math.cos(beta2), y2 + side * TRACK_WIDTH * math.sin(beta2), ) b2_r = ( x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2), y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2), ) self.road_poly.append( ([b1_l, b1_r, b2_r, b2_l], (1, 1, 1) if i % 2 == 0 else (1, 0, 0)) ) self.track = track return True def reset(self): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] while True: success = self._create_track() if success: break if self.verbose == 1: print( "retry to generate track (normal if there are not many" "instances of this message)" ) self.car = Car(self.world, *self.track[0][1:4]) return self.step(None)[0] def step(self, action): if action is not None: self.car.steer(-action[0]) self.car.gas(action[1]) self.car.brake(action[2]) self.car.step(1.0 / FPS) self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) self.t += 1.0 / FPS self.state = self.render("state_pixels") step_reward = 0 done = False if action is not None: # First step without action, called from reset() self.reward -= 0.1 # We actually don't want to count fuel spent, we want car to be faster. # self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER self.car.fuel_spent = 0.0 step_reward = self.reward - self.prev_reward self.prev_reward = self.reward if self.tile_visited_count == len(self.track): done = True x, y = self.car.hull.position if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD: done = True step_reward = -100 return self.state, step_reward, done, {} def render(self, mode="human"): assert mode in ["human", "state_pixels", "rgb_array"] if self.viewer is None: from gym.envs.classic_control import rendering self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) self.score_label = pyglet.text.Label( "0000", font_size=36, x=20, y=WINDOW_H * 2.5 / 40.00, anchor_x="left", anchor_y="center", color=(255, 255, 255, 255), ) self.transform = rendering.Transform() if "t" not in self.__dict__: return # reset() not called yet # Animate zoom first second: zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(self.t, 1) scroll_x = self.car.hull.position[0] scroll_y = self.car.hull.position[1] angle = -self.car.hull.angle vel = self.car.hull.linearVelocity if np.linalg.norm(vel) > 0.5: angle = math.atan2(vel[0], vel[1]) self.transform.set_scale(zoom, zoom) self.transform.set_translation( WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) - scroll_y * zoom * math.sin(angle)), WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) + scroll_y * zoom * math.cos(angle)), ) self.transform.set_rotation(angle) self.car.draw(self.viewer, mode != "state_pixels") arr = None win = self.viewer.window win.switch_to() win.dispatch_events() win.clear() t = self.transform if mode == "rgb_array": VP_W = VIDEO_W VP_H = VIDEO_H elif mode == "state_pixels": VP_W = STATE_W VP_H = STATE_H else: pixel_scale = 1 if hasattr(win.context, "_nscontext"): pixel_scale = ( win.context._nscontext.view().backingScaleFactor() ) # pylint: disable=protected-access VP_W = int(pixel_scale * WINDOW_W) VP_H = int(pixel_scale * WINDOW_H) gl.glViewport(0, 0, VP_W, VP_H) t.enable() self.render_road() for geom in self.viewer.onetime_geoms: geom.render() self.viewer.onetime_geoms = [] t.disable() self.render_indicators(WINDOW_W, WINDOW_H) if mode == "human": win.flip() return self.viewer.isopen image_data = ( pyglet.image.get_buffer_manager().get_color_buffer().get_image_data() ) arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep="") arr = arr.reshape(VP_H, VP_W, 4) arr = arr[::-1, :, 0:3] return arr def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None def render_road(self): colors = [0.4, 0.8, 0.4, 1.0] * 4 polygons_ = [ +PLAYFIELD, +PLAYFIELD, 0, +PLAYFIELD, -PLAYFIELD, 0, -PLAYFIELD, -PLAYFIELD, 0, -PLAYFIELD, +PLAYFIELD, 0, ] k = PLAYFIELD / 20.0 colors.extend([0.4, 0.9, 0.4, 1.0] * 4 * 20 * 20) for x in range(-20, 20, 2): for y in range(-20, 20, 2): polygons_.extend( [ k * x + k, k * y + 0, 0, k * x + 0, k * y + 0, 0, k * x + 0, k * y + k, 0, k * x + k, k * y + k, 0, ] ) for poly, color in self.road_poly: colors.extend([color[0], color[1], color[2], 1] * len(poly)) for p in poly: polygons_.extend([p[0], p[1], 0]) vl = pyglet.graphics.vertex_list( len(polygons_) // 3, ("v3f", polygons_), ("c4f", colors) ) # gl.GL_QUADS, vl.draw(gl.GL_QUADS) vl.delete() def render_indicators(self, W, H): s = W / 40.0 h = H / 40.0 colors = [0, 0, 0, 1] * 4 polygons = [W, 0, 0, W, 5 * h, 0, 0, 5 * h, 0, 0, 0, 0] def vertical_ind(place, val, color): colors.extend([color[0], color[1], color[2], 1] * 4) polygons.extend( [ place * s, h + h * val, 0, (place + 1) * s, h + h * val, 0, (place + 1) * s, h, 0, (place + 0) * s, h, 0, ] ) def horiz_ind(place, val, color): colors.extend([color[0], color[1], color[2], 1] * 4) polygons.extend( [ (place + 0) * s, 4 * h, 0, (place + val) * s, 4 * h, 0, (place + val) * s, 2 * h, 0, (place + 0) * s, 2 * h, 0, ] ) true_speed = np.sqrt( np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1]) ) vertical_ind(5, 0.02 * true_speed, (1, 1, 1)) vertical_ind(7, 0.01 * self.car.wheels[0].omega, (0.0, 0, 1)) # ABS sensors vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1)) vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1)) vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1)) horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0)) horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0)) vl = pyglet.graphics.vertex_list( len(polygons) // 3, ("v3f", polygons), ("c4f", colors) ) # gl.GL_QUADS, vl.draw(gl.GL_QUADS) vl.delete() self.score_label.text = "%04i" % self.reward self.score_label.draw()
class CarRacing(gym.Env, EzPickle): metadata = { 'render.modes': ['human', 'rgb_array', 'state_pixels'], 'video.frames_per_second': FPS } def __init__(self, seed=None, verbose=0): EzPickle.__init__(self) #self.contactListener_keepref = FrictionDetector(self) #self.world = Box2D.b2World((0,0), contactListener=self.contactListener_keepref) self.world = Box2D.b2World((0, 0)) self.id = self.seed(seed=seed) self.viewer = None self.invisible_state_window = None self.invisible_video_window = None self.labels = [] self.road = None self.car = None self.dt = 1.0 / FPS self.action = np.zeros((3, )) self.state = np.zeros((11, )) self.reward = 0.0 self.prev_reward = 0.0 self.verbose = verbose self.track_width = TRACK_WIDTH self.fd_tile = fixtureDef(shape=polygonShape( vertices=[(0, 0), (1, 0), (1, -1), (0, -1)])) self.action_space = spaces.Box(np.array([-1, 0, 0]), np.array([1, 1, 1]), dtype=np.float32) self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(11, ), dtype=np.float32) #self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8) def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return hex(seed) def _destroy(self): if not self.road: return for t in self.road: self.world.DestroyBody(t) self.road = [] self.car.destroy() def _create_track(self): CHECKPOINTS = 12 # Create checkpoints checkpoints = [] for c in range(CHECKPOINTS): alpha = 2 * math.pi * c / CHECKPOINTS + self.np_random.uniform( 0, 2 * math.pi * 1 / CHECKPOINTS) rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD) if c == 0: alpha = 0 rad = 1.5 * TRACK_RAD if c == CHECKPOINTS - 1: alpha = 2 * math.pi * c / CHECKPOINTS self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS rad = 1.5 * TRACK_RAD checkpoints.append( (alpha, rad * math.cos(alpha), rad * math.sin(alpha))) #print "\n".join(str(h) for h in checkpoints) #self.road_poly = [([(tx,ty) for a,tx,ty in checkpoints], (0.7,0.7,0.9))] # uncomment this to see checkpoints self.road = [] # Go from one checkpoint to another to create track x, y, beta = 1.5 * TRACK_RAD, 0, 0 dest_i = 0 laps = 0 track = [] no_freeze = 2500 visited_other_side = False while True: alpha = math.atan2(y, x) if visited_other_side and alpha > 0: laps += 1 visited_other_side = False if alpha < 0: visited_other_side = True alpha += 2 * math.pi while True: # Find destination from checkpoints failed = True while True: dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)] if alpha <= dest_alpha: failed = False break dest_i += 1 if dest_i % len(checkpoints) == 0: break if not failed: break alpha -= 2 * math.pi continue r1x = math.cos(beta) r1y = math.sin(beta) p1x = -r1y p1y = r1x dest_dx = dest_x - x # vector towards destination dest_dy = dest_y - y proj = r1x * dest_dx + r1y * dest_dy # destination vector projected on rad while beta - alpha > 1.5 * math.pi: beta -= 2 * math.pi while beta - alpha < -1.5 * math.pi: beta += 2 * math.pi prev_beta = beta proj *= SCALE if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001 * proj)) if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001 * proj)) x += p1x * TRACK_DETAIL_STEP y += p1y * TRACK_DETAIL_STEP track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y)) if laps > 4: break no_freeze -= 1 if no_freeze == 0: break #print "\n".join([str(t) for t in enumerate(track)]) # Find closed loop range i1..i2, first loop should be ignored, second is OK i1, i2 = -1, -1 i = len(track) while True: i -= 1 if i == 0: return False # Failed pass_through_start = track[i][0] > self.start_alpha and track[ i - 1][0] <= self.start_alpha if pass_through_start and i2 == -1: i2 = i elif pass_through_start and i1 == -1: i1 = i break if self.verbose == 1: print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1)) assert i1 != -1 assert i2 != -1 track = track[i1:i2 - 1] first_beta = track[0][1] first_perp_x = math.cos(first_beta) first_perp_y = math.sin(first_beta) # Length of perpendicular jump to put together head and tail well_glued_together = np.sqrt( np.square(first_perp_x * (track[0][2] - track[-1][2])) + np.square(first_perp_y * (track[0][3] - track[-1][3]))) if well_glued_together > TRACK_DETAIL_STEP: return False # Red-white border on hard turns border = [False] * len(track) for i in range(len(track)): good = True oneside = 0 for neg in range(BORDER_MIN_COUNT): beta1 = track[i - neg - 0][1] beta2 = track[i - neg - 1][1] good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2 oneside += np.sign(beta1 - beta2) good &= abs(oneside) == BORDER_MIN_COUNT border[i] = good for i in range(len(track)): for neg in range(BORDER_MIN_COUNT): border[i - neg] |= border[i] # Create tiles for i in range(len(track)): alpha1, beta1, x1, y1 = track[i] alpha2, beta2, x2, y2 = track[i - 1] road1_l = (x1 - TRACK_WIDTH * math.cos(beta1), y1 - TRACK_WIDTH * math.sin(beta1)) road1_r = (x1 + TRACK_WIDTH * math.cos(beta1), y1 + TRACK_WIDTH * math.sin(beta1)) road2_l = (x2 - TRACK_WIDTH * math.cos(beta2), y2 - TRACK_WIDTH * math.sin(beta2)) road2_r = (x2 + TRACK_WIDTH * math.cos(beta2), y2 + TRACK_WIDTH * math.sin(beta2)) vertices = [road1_l, road1_r, road2_r, road2_l] self.fd_tile.shape.vertices = vertices t = self.world.CreateStaticBody(fixtures=self.fd_tile) t.userData = t c = 0.01 * (i % 3) #t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c] t.color = ROAD_COLOR t.road_visited = False t.road_friction = ROAD_FRICTION t.fixtures[0].sensor = True self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color)) self.road.append(t) if border[i]: side = np.sign(beta2 - beta1) b1_l = (x1 + side * TRACK_WIDTH * math.cos(beta1), y1 + side * TRACK_WIDTH * math.sin(beta1)) b1_r = (x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1), y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1)) b2_l = (x2 + side * TRACK_WIDTH * math.cos(beta2), y2 + side * TRACK_WIDTH * math.sin(beta2)) b2_r = (x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2), y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2)) self.road_poly.append(([b1_l, b1_r, b2_r, b2_l], (1, 1, 1) if i % 2 == 0 else (1, 0, 0))) self.track = track return True def reset(self): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] while True: success = self._create_track() if success: break if self.verbose == 1: print( "retry to generate track (normal if there are not many of this messages)" ) self.car = Car(self.world, *self.track[0][1:4]) return self.step(None) def step(self, action): if action is not None: self.action = np.array(action) self.car.steer(-action[0]) self.car.gas(action[1]) self.car.brake(action[2]) self.car.step(self.dt) self.world.Step(self.dt, 6 * 30, 2 * 30) self.t += self.dt self.render("state_pixels") # Update vehicle state self.state[0:2] = self.car.hull.position self.state[2] = (self.car.hull.angle + np.pi / 2) % (2 * np.pi) self.state[3:5] = self.car.hull.linearVelocity self.state[5] = self.car.hull.angularVelocity self.state[6] = self.car.wheels[0].joint.angle self.state[7] = self.car.wheels[0].omega self.state[8] = self.car.wheels[1].omega self.state[9] = self.car.wheels[2].omega self.state[10] = self.car.wheels[3].omega step_reward = 0 done = False if action is not None: # First step without action, called from reset() self.reward -= 0.1 # We actually don't want to count fuel spent, we want car to be faster. # self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER self.car.fuel_spent = 0.0 step_reward = self.reward - self.prev_reward self.prev_reward = self.reward if self.tile_visited_count == len(self.track): done = True x, y = self.car.hull.position if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD: done = True step_reward = -100 return self.state def render(self, mode='human'): assert mode in ['human', 'state_pixels', 'rgb_array'] if self.viewer is None: from gym.envs.classic_control import rendering self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) self.labels.append( pyglet.text.Label('Input', font_size=15, x=WINDOW_W / 16 * 3, y=WINDOW_H / 12, anchor_x='center', anchor_y='center', color=(255, 255, 255, 255))) self.labels.append( pyglet.text.Label('S', font_size=12, x=WINDOW_W / 64 * 7, y=WINDOW_H / 100 * 3, anchor_x='center', anchor_y='center', color=(255, 255, 255, 255))) self.labels.append( pyglet.text.Label('T', font_size=12, x=WINDOW_W / 64 * 14, y=WINDOW_H / 100 * 3, anchor_x='center', anchor_y='center', color=(255, 255, 255, 255))) self.labels.append( pyglet.text.Label('B', font_size=12, x=WINDOW_W / 64 * 17, y=WINDOW_H / 100 * 3, anchor_x='center', anchor_y='center', color=(255, 255, 255, 255))) self.labels.append( pyglet.text.Label('Linear Velocity', font_size=15, x=WINDOW_W / 2, y=WINDOW_H / 12, anchor_x='center', anchor_y='center', color=(255, 255, 255, 255))) self.labels.append( pyglet.text.Label('FL FR', font_size=12, x=WINDOW_W / 16 * 7, y=WINDOW_H / 100 * 3, anchor_x='center', anchor_y='center', color=(255, 255, 255, 255))) self.labels.append( pyglet.text.Label('RL RR', font_size=12, x=WINDOW_W / 2, y=WINDOW_H / 100 * 3, anchor_x='center', anchor_y='center', color=(255, 255, 255, 255))) self.labels.append( pyglet.text.Label('C', font_size=12, x=WINDOW_W / 64 * 37, y=WINDOW_H / 100 * 3, anchor_x='center', anchor_y='center', color=(255, 255, 255, 255))) self.labels.append( pyglet.text.Label('Angular Velocity', font_size=15, x=WINDOW_W / 16 * 13, y=WINDOW_H / 12, anchor_x='center', anchor_y='center', color=(255, 255, 255, 255))) self.labels.append( pyglet.text.Label('C', font_size=12, x=WINDOW_W / 16 * 13, y=WINDOW_H / 100 * 3, anchor_x='center', anchor_y='center', color=(255, 255, 255, 255))) self.transform = rendering.Transform() if "t" not in self.__dict__: return # reset() not called yet #zoom = 0.1*SCALE*max(1-self.t, 0) + ZOOM*SCALE*min(self.t, 1) # Animate zoom first second zoom = np.clip((ZOOM * SCALE - 1) * np.power(self.t, 5) + 1, 1, ZOOM * SCALE) zoom_state = ZOOM * SCALE * STATE_W / WINDOW_W zoom_video = ZOOM * SCALE * VIDEO_W / WINDOW_W scroll_x = self.car.hull.position[0] scroll_y = self.car.hull.position[1] angle = -self.car.hull.angle vel = self.car.hull.linearVelocity if np.linalg.norm(vel) > 0.5: angle = math.atan2(vel[0], vel[1]) self.transform.set_scale(zoom, zoom) self.transform.set_translation( WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) - scroll_y * zoom * math.sin(angle)), WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) + scroll_y * zoom * math.cos(angle))) self.transform.set_rotation(angle) self.car.draw(self.viewer, mode != "state_pixels") arr = None win = self.viewer.window win.switch_to() win.dispatch_events() win.clear() t = self.transform if mode == 'rgb_array': VP_W = VIDEO_W VP_H = VIDEO_H elif mode == 'state_pixels': VP_W = STATE_W VP_H = STATE_H else: pixel_scale = 1 if hasattr(win.context, '_nscontext'): pixel_scale = win.context._nscontext.view().backingScaleFactor( ) # pylint: disable=protected-access VP_W = int(pixel_scale * WINDOW_W) VP_H = int(pixel_scale * WINDOW_H) gl.glViewport(0, 0, VP_W, VP_H) t.enable() self.render_road() for geom in self.viewer.onetime_geoms: geom.render() self.viewer.onetime_geoms = [] t.disable() self.render_indicators(WINDOW_W, WINDOW_H) if mode == 'human': win.flip() return self.viewer.isopen image_data = pyglet.image.get_buffer_manager().get_color_buffer( ).get_image_data() arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep='') arr = arr.reshape(VP_H, VP_W, 4) arr = arr[::-1, :, 0:3] return arr def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None def render_road(self): gl.glBegin(gl.GL_QUADS) #gl.glColor4f(0.4, 0.8, 0.4, 1.0) gl.glColor4f(0.75, 0.75, 0.75, 1.0) gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0) gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0) gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0) gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0) #gl.glColor4f(0.4, 0.9, 0.4, 1.0) gl.glColor4f(0.65, 0.65, 0.65, 1.0) k = PLAYFIELD / 20.0 for x in range(-20, 20, 2): for y in range(-20, 20, 2): gl.glVertex3f(k * x + k, k * y + 0, 0) gl.glVertex3f(k * x + 0, k * y + 0, 0) gl.glVertex3f(k * x + 0, k * y + k, 0) gl.glVertex3f(k * x + k, k * y + k, 0) for i, (poly, color) in enumerate(self.road_poly): if i == 2: gl.glColor4f(1, 1, 1, 1) else: gl.glColor4f(color[0], color[1], color[2], 1) for p in poly: gl.glVertex3f(p[0], p[1], 0) gl.glEnd() def render_indicators(self, W, H): gl.glBegin(gl.GL_QUADS) gl.glColor4f(0, 0, 0, 0.2) gl.glVertex3f(W, 0, 0) gl.glVertex3f(W, H / 10, 0) gl.glVertex3f(0, H / 10, 0) gl.glVertex3f(0, 0, 0) w = W / 100 h = H / 100 def ver_ind(place, val, color): gl.glColor4f(color[0], color[1], color[2], color[3]) gl.glVertex3f(place - 1.5 * w, h * val, 0) gl.glVertex3f(place + 1.5 * w, h * val, 0) gl.glVertex3f(place + 1.5 * w, 0, 0) gl.glVertex3f(place - 1.5 * w, 0, 0) def hor_ind(place, val, color): gl.glColor4f(color[0], color[1], color[2], color[3]) gl.glVertex3f(place, 5 * h, 0) gl.glVertex3f(place + w * val, 5 * h, 0) gl.glVertex3f(place + w * val, h, 0) gl.glVertex3f(place, h, 0) true_speed = np.linalg.norm(self.car.hull.linearVelocity) hor_ind(W / 64 * 7, 7 * self.action[0], (1, 1, 0, 0.7)) ver_ind(W / 64 * 14, 6 * self.action[1], (0, 1, 0, 0.7)) ver_ind(W / 64 * 17, 6 * self.action[2], (1, 0, 0, 0.7)) ver_ind(W / 16 * 7 - 1.5 * w, 0.025 * self.car.wheels[0].omega, (0, 0.7, 1, 0.7)) ver_ind(W / 16 * 7 + 1.5 * w, 0.025 * self.car.wheels[1].omega, (0, 0.7, 1, 0.7)) ver_ind(W / 2 - 1.5 * w, 0.025 * self.car.wheels[2].omega, (0, 0.5, 1, 0.7)) ver_ind(W / 2 + 1.5 * w, 0.025 * self.car.wheels[3].omega, (0, 0.5, 1, 0.8)) ver_ind(W / 64 * 37, 0.05 * true_speed, (0, 0, 1, 0.7)) hor_ind(W / 16 * 13, -1 * self.car.hull.angularVelocity, (0.5, 0, 1, 0.7)) gl.glEnd() for label in self.labels: label.draw()
class CarRacing(gym.Env, EzPickle): metadata = { "render.modes": ["human", "rgb_array", "state_pixels"], "video.frames_per_second": FPS, } def __init__(self, verbose=1, obstacles=False): EzPickle.__init__(self) self.SI = SI(env=self, car_shape=(4, 8), image_shape=(STATE_W, STATE_H), render_distance=40, road_width=40 / 6, fill=True, interpolate=True, obstacles=obstacles) self.seed() self.contactListener_keepref = FrictionDetector(self) self.world = Box2D.b2World( (0, 0), contactListener=self.contactListener_keepref) self.viewer = None self.invisible_state_window = None self.invisible_video_window = None self.road = None self.car = None self.reward = 0.0 self.prev_reward = 0.0 self.verbose = verbose self.steps = 0 self.n_obstacles = 10 self.obstacles = obstacles self.dim_obstacles = (0.5, 0.5) self.collision_threshold = 0.1 #1m distance between obstacle and vehicle self.COLLISION = False self.fd_tile = fixtureDef( shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]) ) # The fd_tile variable defines the fixture with the shape defined as a rectangle with coordinates # [(0, 0) | (1, 0)] # [(0,-1) | (1,-1)] self.action_space = spaces.Box(np.array([-1, 0, 0]), np.array([+1, +1, +1]), dtype=np.float32) # steer, gas, brake self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8) self.R = lambda x, y, angle: [ x * np.cos(angle) - y * np.sin(angle), y * np.cos(angle) + x * np. sin(angle) ] def _create_obstacles(self): "This function randomly generates obstacles along the track for the vehicle to avoid" obstacle_interval = np.floor( (len(self.track) - 1) / (self.n_obstacles) ) # after how many track vertices must a obstacle appear. count = 0 self.obstacles_pos = np.zeros((self.n_obstacles, 4, 2)) for i in range(len(self.track)): if i % obstacle_interval == 0 and count < self.n_obstacles and i > 1: count += 1 alpha1, beta1, x1, y1 = self.track[i] alpha2, beta2, x2, y2 = self.track[i - 1] sign = 1 if np.random.random() < 0.5 else -1 road1_l = (x1 - sign * self.dim_obstacles[0] * math.cos(beta1), y1 - sign * self.dim_obstacles[0] * math.sin(beta1)) road1_r = (x1 + sign * self.dim_obstacles[1] * math.cos(beta1), y1 + sign * self.dim_obstacles[1] * math.sin(beta1)) road2_l = (x2 - sign * self.dim_obstacles[0] * math.cos(beta2), y2 - sign * self.dim_obstacles[0] * math.sin(beta2)) road2_r = (x2 + sign * self.dim_obstacles[1] * math.cos(beta2), y2 + sign * self.dim_obstacles[1] * math.sin(beta2)) self.obstacle_poly.extend([ road1_l[0], road1_l[1], 0, road1_r[0], road1_r[1], 0, road2_r[0], road2_r[1], 0, road2_l[0], road2_l[1], 0 ]) self.obstacles_pos[count - 1, :, 0] = np.array( [road1_l[0], road1_r[0], road2_r[0], road2_l[0]]) self.obstacles_pos[count - 1, :, 1] = np.array( [road1_l[1], road1_r[1], road2_r[1], road2_l[1]]) if len(self.obstacle_poly) // 3 == 4 * self.n_obstacles: return True else: print('There was a problem generating the obstacle course') return False def _create_track(self): "The number of checkpoints are the number of turns where the minimum is 2." CHECKPOINTS = 12 # Create checkpoints checkpoints = [] for c in range(CHECKPOINTS): noise = self.np_random.uniform(0, 2 * math.pi * 1 / CHECKPOINTS) alpha = 2 * math.pi * c / CHECKPOINTS + noise rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD) if c == 0: alpha = 0 rad = 1.5 * TRACK_RAD if c == CHECKPOINTS - 1: alpha = 2 * math.pi * c / CHECKPOINTS self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS rad = 1.5 * TRACK_RAD checkpoints.append( (alpha, rad * math.cos(alpha), rad * math.sin(alpha))) self.road = [] # Go from one checkpoint to another to create track x, y, beta = 1.5 * TRACK_RAD, 0, 0 # The starting x value is always = dest_i = 0 laps = 0 # The number of laps required to finish the course, leave this on 0 - no lap only once through course. track = [] no_freeze = 2500 visited_other_side = False # This indicates if the lap is completed while True: alpha = math.atan2(y, x) if visited_other_side and alpha > 0: laps += 1 visited_other_side = False if alpha < 0: visited_other_side = True alpha += 2 * math.pi while True: # Find destination from checkpoints failed = True while True: dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)] if alpha <= dest_alpha: failed = False break dest_i += 1 if dest_i % len(checkpoints) == 0: break if not failed: break alpha -= 2 * math.pi continue r1x = math.cos(beta) r1y = math.sin(beta) p1x = -r1y p1y = r1x dest_dx = dest_x - x # vector towards destination dest_dy = dest_y - y # destination vector projected on rad: proj = r1x * dest_dx + r1y * dest_dy while beta - alpha > 1.5 * math.pi: beta -= 2 * math.pi while beta - alpha < -1.5 * math.pi: beta += 2 * math.pi prev_beta = beta proj *= SCALE if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001 * proj)) if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001 * proj)) x += p1x * TRACK_DETAIL_STEP y += p1y * TRACK_DETAIL_STEP track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y)) if laps > 4: break no_freeze -= 1 if no_freeze == 0: break # Find closed loop range i1..i2, first loop should be ignored, second is OK i1, i2 = -1, -1 i = len(track) while True: i -= 1 if i == 0: return False # Failed pass_through_start = (track[i][0] > self.start_alpha and track[i - 1][0] <= self.start_alpha) if pass_through_start and i2 == -1: i2 = i elif pass_through_start and i1 == -1: i1 = i break if self.verbose == 1: print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1)) assert i1 != -1 assert i2 != -1 track = track[i1:i2 - 1] first_beta = track[0][1] first_perp_x = math.cos(first_beta) first_perp_y = math.sin(first_beta) # Length of perpendicular jump to put together head and tail well_glued_together = np.sqrt( np.square(first_perp_x * (track[0][2] - track[-1][2])) + np.square(first_perp_y * (track[0][3] - track[-1][3]))) if well_glued_together > TRACK_DETAIL_STEP: return False # Red-white border on hard turns border = [False] * len(track) for i in range(len(track)): good = True oneside = 0 for neg in range(BORDER_MIN_COUNT): beta1 = track[i - neg - 0][1] beta2 = track[i - neg - 1][1] good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2 oneside += np.sign(beta1 - beta2) good &= abs(oneside) == BORDER_MIN_COUNT border[i] = good for i in range(len(track)): for neg in range(BORDER_MIN_COUNT): border[i - neg] |= border[i] # Create tiles for i in range(len(track)): alpha1, beta1, x1, y1 = track[i] alpha2, beta2, x2, y2 = track[i - 1] road1_l = ( x1 - TRACK_WIDTH * math.cos(beta1), y1 - TRACK_WIDTH * math.sin(beta1), ) road1_r = ( x1 + TRACK_WIDTH * math.cos(beta1), y1 + TRACK_WIDTH * math.sin(beta1), ) road2_l = ( x2 - TRACK_WIDTH * math.cos(beta2), y2 - TRACK_WIDTH * math.sin(beta2), ) road2_r = ( x2 + TRACK_WIDTH * math.cos(beta2), y2 + TRACK_WIDTH * math.sin(beta2), ) vertices = [road1_l, road1_r, road2_r, road2_l] self.fd_tile.shape.vertices = vertices t = self.world.CreateStaticBody( fixtures=self.fd_tile ) # The call of a static body may be very important and is based on the df_tile = [rl1, rr1, rl2, rr2] t.userData = t c = 0.01 * (i % 3 ) # This is the interchanging colors for the tiles t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c] t.road_visited = False t.road_friction = 1.0 # Here is where we can change the friction coefficient of the road from tar - offroad t.fixtures[0].sensor = True self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color)) self.road.append(t) if border[i]: side = np.sign(beta2 - beta1) b1_l = ( x1 + side * TRACK_WIDTH * math.cos(beta1), y1 + side * TRACK_WIDTH * math.sin(beta1), ) b1_r = ( x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1), y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1), ) b2_l = ( x2 + side * TRACK_WIDTH * math.cos(beta2), y2 + side * TRACK_WIDTH * math.sin(beta2), ) b2_r = ( x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2), y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2), ) self.road_poly.append(([b1_l, b1_r, b2_r, b2_l], (1, 1, 1) if i % 2 == 0 else (1, 0, 0))) self.track = track return True def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _destroy(self): if not self.road: return for t in self.road: self.world.DestroyBody(t) self.road = [] self.car.destroy() def reset(self): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] self.obstacle_poly = [] self.steps = 0 while True: success_track = self._create_track() if self.obstacles: if success_track: success_obstacles = self._create_obstacles() else: success_obstacles = False else: success_obstacles = True # just so it goes through to next stage if success_track and success_obstacles: break if self.verbose == 1: print("retry to generate track (normal if there are not many" "instances of this message)") self.car = Car(self.world, *self.track[0][1:4]) return self.step(None)[0] def step(self, action): self.steps += 1 if action is not None: self.car.steer(-action[0]) self.car.gas(action[1]) self.car.brake(action[2]) self.car.step(1.0 / FPS) self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) self.t += 1.0 / FPS self.state = self.SI.generate_image( ) #self.state = self.render("state_pixels") step_reward = 0 done = False if self.obstacles: self.collision() else: self.COLLISION = False if action is not None: # First step without action, called from reset() self.reward -= 0.1 # We actually don't want to count fuel spent, we want car to be faster. # self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER self.car.fuel_spent = 0.0 step_reward = self.reward - self.prev_reward self.prev_reward = self.reward if self.tile_visited_count == len(self.track): done = True x, y = self.car.hull.position if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD: done = True step_reward = -100 if self.COLLISION: done = True step_reward = -100 return self.state, step_reward, done, {} def render(self, mode="human"): assert mode in ["human", "state_pixels", "rgb_array"] if self.viewer is None: self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) self.score_label = pyglet.text.Label( "0000", font_size=36, x=20, y=WINDOW_H * 2.5 / 40.00, anchor_x="left", anchor_y="center", color=(255, 255, 255, 255), ) self.transform = rendering.Transform() if "t" not in self.__dict__: return # reset() not called yet # Animate zoom first second: zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(self.t, 1) scroll_x = self.car.hull.position[0] scroll_y = self.car.hull.position[1] angle = -self.car.hull.angle vel = self.car.hull.linearVelocity if np.linalg.norm(vel) > 0.5: angle = math.atan2(vel[0], vel[1]) self.transform.set_scale(zoom, zoom) self.transform.set_translation( WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) - scroll_y * zoom * math.sin(angle)), WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) + scroll_y * zoom * math.cos(angle)), ) self.transform.set_rotation(angle) self.car.draw(self.viewer, mode != "state_pixels") arr = None #-------- VP_W = STATE_W VP_H = STATE_H #-------- win = self.viewer.window win.switch_to() win.dispatch_events() win.clear() t = self.transform if mode == "rgb_array": VP_W = VIDEO_W VP_H = VIDEO_H elif mode == "state_pixels": VP_W = STATE_W VP_H = STATE_H else: pixel_scale = 1 if hasattr(win.context, "_nscontext"): pixel_scale = ( win.context._nscontext.view().backingScaleFactor()) # pylint: disable=protected-access VP_W = int(pixel_scale * WINDOW_W) VP_H = int(pixel_scale * WINDOW_H) gl.glViewport(0, 0, VP_W, VP_H) t.enable() self.render_road() if self.obstacles: self.render_obstacles() self.render_collision() for geom in self.viewer.onetime_geoms: geom.render() self.viewer.onetime_geoms = [] t.disable() self.render_indicators(WINDOW_W, WINDOW_H) if mode == "human": win.flip() return self.viewer.isopen image_data = (pyglet.image.get_buffer_manager().get_color_buffer(). get_image_data()) arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep="") arr = arr.reshape(VP_H, VP_W, 4) arr = arr[::-1, :, 0:3] return arr def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None def render_road(self): colors = [0.4, 0.8, 0.4, 1.0] * 4 polygons_ = [ +PLAYFIELD, +PLAYFIELD, 0, +PLAYFIELD, -PLAYFIELD, 0, -PLAYFIELD, -PLAYFIELD, 0, -PLAYFIELD, +PLAYFIELD, 0, ] k = PLAYFIELD / 20.0 colors.extend([0.4, 0.9, 0.4, 1.0] * 4 * 20 * 20) for x in range(-20, 20, 2): for y in range(-20, 20, 2): polygons_.extend([ k * x + k, k * y + 0, 0, k * x + 0, k * y + 0, 0, k * x + 0, k * y + k, 0, k * x + k, k * y + k, 0, ]) for poly, color in self.road_poly: # self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color)) colors.extend([color[0], color[1], color[2], 1] * len(poly)) for p in poly: polygons_.extend([p[0], p[1], 0]) vl = pyglet.graphics.vertex_list( len(polygons_) // 3, ("v3f", polygons_), ( "c4f", colors ) # gl.GL_QUADS, # The // 3 is dividing by 3 but obtaining only the integer value ) vl.draw(gl.GL_QUADS) def render_indicators(self, W, H): s = W / 40.0 h = H / 40.0 colors = [0, 0, 0, 1] * 4 polygons = [W, 0, 0, W, 5 * h, 0, 0, 5 * h, 0, 0, 0, 0] def vertical_ind(place, val, color): colors.extend([color[0], color[1], color[2], 1] * 4) polygons.extend([ place * s, h + h * val, 0, (place + 1) * s, h + h * val, 0, (place + 1) * s, h, 0, (place + 0) * s, h, 0, ]) def horiz_ind(place, val, color): colors.extend([color[0], color[1], color[2], 1] * 4) polygons.extend([ (place + 0) * s, 4 * h, 0, (place + val) * s, 4 * h, 0, (place + val) * s, 2 * h, 0, (place + 0) * s, 2 * h, 0, ]) true_speed = np.sqrt( np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1])) vertical_ind(5, 0.02 * true_speed, (1, 1, 1)) vertical_ind(7, 0.01 * self.car.wheels[0].omega, (0.0, 0, 1)) # ABS sensors vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1)) vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1)) vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1)) horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0)) horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0)) vl = pyglet.graphics.vertex_list( len(polygons) // 3, ("v3f", polygons), ("c4f", colors) # gl.GL_QUADS, ) vl.draw(gl.GL_QUADS) self.score_label.text = "%04i" % self.reward self.score_label.draw() def render_obstacles(self): "This function is responsible for rendering all the obstacles randomly in the course" # RGB for all 4 vertices by the number C = [255, 5, 5, 255, 5, 5, 255, 5, 5, 255, 5, 5] * self.n_obstacles # Divide by 3 because there are 3 components x,y,z v2 = pyglet.graphics.vertex_list( len(self.obstacle_poly) // 3, ('v3f', self.obstacle_poly), ('c3B', C)) v2.draw(gl.GL_QUADS) def render_collision(self): if self.COLLISION: x, y = self.car.hull.position t_angle = self.car.hull.angle x1 = (self.R(2, 3, t_angle)[0]) + (x) y1 = (self.R(2, 3, t_angle)[1]) + (y) x2 = (self.R(-2, 3, t_angle)[0]) + (x) y2 = (self.R(-2, 3, t_angle)[1]) + (y) x3 = self.R(-2, -3, t_angle)[0] + (x) y3 = self.R(-2, -3, t_angle)[1] + (y) x4 = self.R(2, -3, t_angle)[0] + (x) y4 = self.R(2, -3, t_angle)[1] + (y) V = [x1, y1, 0, x2, y2, 0, x3, y3, 0, x4, y4, 0] C = [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255] v3 = pyglet.graphics.vertex_list(4, ('v3f', V), ('c3B', C)) v3.draw(gl.GL_QUADS) def collision(self): " This function determines whether a collision with an obstacle has occurred or not " " The vehicle cannot reverse, so we are only interested in the front two components of the vehicles hull" x, y = self.car.hull.position t_angle = self.car.hull.angle x1 = self.R(2, 3, t_angle)[0] + x y1 = self.R(2, 3, t_angle)[1] + y x2 = self.R(-2, 3, t_angle)[0] + x y2 = self.R(-2, 3, t_angle)[1] + y x3 = self.R(-2, -3, t_angle)[0] + x y3 = self.R(-2, -3, t_angle)[1] + y x4 = self.R(2, -3, t_angle)[0] + x y4 = self.R(2, -3, t_angle)[1] + y midpoints_upper = [ (self.obstacles_pos[:, 2, 0] + self.obstacles_pos[:, 3, 0]) / 2, (self.obstacles_pos[:, 2, 1] + self.obstacles_pos[:, 3, 1]) / 2 ] midpoints_lower = [ (self.obstacles_pos[:, 2, 0] + self.obstacles_pos[:, 3, 0]) / 2, (self.obstacles_pos[:, 2, 1] + self.obstacles_pos[:, 3, 1]) / 2 ] distance_1 = np.sqrt((x1 - self.obstacles_pos[:, :, 0])**2 + (y1 - self.obstacles_pos[:, :, 1])**2) distance_2 = np.sqrt((x2 - self.obstacles_pos[:, :, 0])**2 + (y2 - self.obstacles_pos[:, :, 1])**2) distance_3 = np.sqrt((x3 - self.obstacles_pos[:, :, 0])**2 + (y3 - self.obstacles_pos[:, :, 1])**2) distance_4 = np.sqrt((x4 - self.obstacles_pos[:, :, 0])**2 + (y4 - self.obstacles_pos[:, :, 1])**2) u_midpoint_1 = np.sqrt((x1 - midpoints_upper[0])**2 + (y1 - midpoints_upper[1])**2) * 0.5 u_midpoint_2 = np.sqrt((x2 - midpoints_upper[0])**2 + (y2 - midpoints_upper[1])**2) * 0.5 u_midpoint_3 = np.sqrt((x3 - midpoints_upper[0])**2 + (y3 - midpoints_upper[1])**2) * 0.5 u_midpoint_4 = np.sqrt((x4 - midpoints_upper[0])**2 + (y4 - midpoints_upper[1])**2) * 0.5 l_midpoint_1 = np.sqrt((x1 - midpoints_lower[0])**2 + (y1 - midpoints_lower[1])**2) * 0.5 l_midpoint_2 = np.sqrt((x2 - midpoints_lower[0])**2 + (y2 - midpoints_lower[1])**2) * 0.5 l_midpoint_3 = np.sqrt((x3 - midpoints_lower[0])**2 + (y3 - midpoints_lower[1])**2) * 0.5 l_midpoint_4 = np.sqrt((x4 - midpoints_lower[0])**2 + (y4 - midpoints_lower[1])**2) * 0.5 smallest_distance = np.min([ np.min(distance_1), np.min(distance_2), np.min(distance_3), np.min(distance_4), np.min(u_midpoint_1), np.min(u_midpoint_2), np.min(u_midpoint_3), np.min(u_midpoint_4), np.min(l_midpoint_1), np.min(l_midpoint_2), np.min(l_midpoint_3), np.min(l_midpoint_4) ]) if smallest_distance < self.collision_threshold: self.COLLISION = True else: self.COLLISION = False
class CarRacing(gym.Env, EzPickle): """ ### Description The easiest continuous control task to learn from pixels - a top-down racing environment. Discrete control is reasonable in this environment as well; on/off discretization is fine. The game is solved when the agent consistently gets 900+ points. The generated track is random every episode. Some indicators are shown at the bottom of the window along with the state RGB buffer. From left to right: true speed, four ABS sensors, steering wheel position, and gyroscope. To play yourself (it's rather fast for humans), type: ``` python gym/envs/box2d/car_racing.py ``` Remember: it's a powerful rear-wheel drive car - don't press the accelerator and turn at the same time. ### Action Space There are 3 actions: steering (-1 is full left, +1 is full right), gas, and breaking. ### Observation Space State consists of 96x96 pixels. ### Rewards The reward is -0.1 every frame and +1000/N for every track tile visited, where N is the total number of tiles visited in the track. For example, if you have finished in 732 frames, your reward is 1000 - 0.1*732 = 926.8 points. ### Starting State The car starts at rest in the center of the road. ### Episode Termination The episode finishes when all of the tiles are visited. The car can also go outside of the playfield - that is, far off the track, in which case it will receive -100 reward and die. ### Arguments There are no arguments supported in constructing the environment. ### Version History - v0: Current version ### References - Chris Campbell (2014), http://www.iforce2d.net/b2dtut/top-down-car. ### Credits Created by Oleg Klimov """ metadata = { "render_modes": ["human", "rgb_array", "state_pixels"], "render_fps": FPS, } def __init__(self, verbose=1, lap_complete_percent=0.95): EzPickle.__init__(self) pygame.init() self.contactListener_keepref = FrictionDetector( self, lap_complete_percent) self.world = Box2D.b2World( (0, 0), contactListener=self.contactListener_keepref) self.screen = None self.clock = None self.isopen = True self.invisible_state_window = None self.invisible_video_window = None self.road = None self.car = None self.reward = 0.0 self.prev_reward = 0.0 self.verbose = verbose self.new_lap = False self.fd_tile = fixtureDef(shape=polygonShape( vertices=[(0, 0), (1, 0), (1, -1), (0, -1)])) # This will throw a warning in tests/envs/test_envs in utils/env_checker.py as the space is not symmetric # or normalised however this is not possible here so ignore self.action_space = spaces.Box( np.array([-1, 0, 0]).astype(np.float32), np.array([+1, +1, +1]).astype(np.float32), ) # steer, gas, brake self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8) def _destroy(self): if not self.road: return for t in self.road: self.world.DestroyBody(t) self.road = [] self.car.destroy() def _create_track(self): CHECKPOINTS = 12 # Create checkpoints checkpoints = [] for c in range(CHECKPOINTS): noise = self.np_random.uniform(0, 2 * math.pi * 1 / CHECKPOINTS) alpha = 2 * math.pi * c / CHECKPOINTS + noise rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD) if c == 0: alpha = 0 rad = 1.5 * TRACK_RAD if c == CHECKPOINTS - 1: alpha = 2 * math.pi * c / CHECKPOINTS self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS rad = 1.5 * TRACK_RAD checkpoints.append( (alpha, rad * math.cos(alpha), rad * math.sin(alpha))) self.road = [] # Go from one checkpoint to another to create track x, y, beta = 1.5 * TRACK_RAD, 0, 0 dest_i = 0 laps = 0 track = [] no_freeze = 2500 visited_other_side = False while True: alpha = math.atan2(y, x) if visited_other_side and alpha > 0: laps += 1 visited_other_side = False if alpha < 0: visited_other_side = True alpha += 2 * math.pi while True: # Find destination from checkpoints failed = True while True: dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)] if alpha <= dest_alpha: failed = False break dest_i += 1 if dest_i % len(checkpoints) == 0: break if not failed: break alpha -= 2 * math.pi continue r1x = math.cos(beta) r1y = math.sin(beta) p1x = -r1y p1y = r1x dest_dx = dest_x - x # vector towards destination dest_dy = dest_y - y # destination vector projected on rad: proj = r1x * dest_dx + r1y * dest_dy while beta - alpha > 1.5 * math.pi: beta -= 2 * math.pi while beta - alpha < -1.5 * math.pi: beta += 2 * math.pi prev_beta = beta proj *= SCALE if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001 * proj)) if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001 * proj)) x += p1x * TRACK_DETAIL_STEP y += p1y * TRACK_DETAIL_STEP track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y)) if laps > 4: break no_freeze -= 1 if no_freeze == 0: break # Find closed loop range i1..i2, first loop should be ignored, second is OK i1, i2 = -1, -1 i = len(track) while True: i -= 1 if i == 0: return False # Failed pass_through_start = (track[i][0] > self.start_alpha and track[i - 1][0] <= self.start_alpha) if pass_through_start and i2 == -1: i2 = i elif pass_through_start and i1 == -1: i1 = i break if self.verbose == 1: print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1)) assert i1 != -1 assert i2 != -1 track = track[i1:i2 - 1] first_beta = track[0][1] first_perp_x = math.cos(first_beta) first_perp_y = math.sin(first_beta) # Length of perpendicular jump to put together head and tail well_glued_together = np.sqrt( np.square(first_perp_x * (track[0][2] - track[-1][2])) + np.square(first_perp_y * (track[0][3] - track[-1][3]))) if well_glued_together > TRACK_DETAIL_STEP: return False # Red-white border on hard turns border = [False] * len(track) for i in range(len(track)): good = True oneside = 0 for neg in range(BORDER_MIN_COUNT): beta1 = track[i - neg - 0][1] beta2 = track[i - neg - 1][1] good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2 oneside += np.sign(beta1 - beta2) good &= abs(oneside) == BORDER_MIN_COUNT border[i] = good for i in range(len(track)): for neg in range(BORDER_MIN_COUNT): border[i - neg] |= border[i] # Create tiles for i in range(len(track)): alpha1, beta1, x1, y1 = track[i] alpha2, beta2, x2, y2 = track[i - 1] road1_l = ( x1 - TRACK_WIDTH * math.cos(beta1), y1 - TRACK_WIDTH * math.sin(beta1), ) road1_r = ( x1 + TRACK_WIDTH * math.cos(beta1), y1 + TRACK_WIDTH * math.sin(beta1), ) road2_l = ( x2 - TRACK_WIDTH * math.cos(beta2), y2 - TRACK_WIDTH * math.sin(beta2), ) road2_r = ( x2 + TRACK_WIDTH * math.cos(beta2), y2 + TRACK_WIDTH * math.sin(beta2), ) vertices = [road1_l, road1_r, road2_r, road2_l] self.fd_tile.shape.vertices = vertices t = self.world.CreateStaticBody(fixtures=self.fd_tile) t.userData = t c = 0.01 * (i % 3) t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c] t.road_visited = False t.road_friction = 1.0 t.idx = i t.fixtures[0].sensor = True self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color)) self.road.append(t) if border[i]: side = np.sign(beta2 - beta1) b1_l = ( x1 + side * TRACK_WIDTH * math.cos(beta1), y1 + side * TRACK_WIDTH * math.sin(beta1), ) b1_r = ( x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1), y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1), ) b2_l = ( x2 + side * TRACK_WIDTH * math.cos(beta2), y2 + side * TRACK_WIDTH * math.sin(beta2), ) b2_r = ( x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2), y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2), ) self.road_poly.append(([b1_l, b1_r, b2_r, b2_l], (1, 1, 1) if i % 2 == 0 else (1, 0, 0))) self.track = track return True def reset( self, *, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None, ): super().reset(seed=seed) self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.new_lap = False self.road_poly = [] while True: success = self._create_track() if success: break if self.verbose == 1: print("retry to generate track (normal if there are not many" "instances of this message)") self.car = Car(self.world, *self.track[0][1:4]) if not return_info: return self.step(None)[0] else: return self.step(None)[0], {} def step(self, action): if action is not None: self.car.steer(-action[0]) self.car.gas(action[1]) self.car.brake(action[2]) self.car.step(1.0 / FPS) self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) self.t += 1.0 / FPS self.state = self.render("state_pixels") step_reward = 0 done = False if action is not None: # First step without action, called from reset() self.reward -= 0.1 # We actually don't want to count fuel spent, we want car to be faster. # self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER self.car.fuel_spent = 0.0 step_reward = self.reward - self.prev_reward self.prev_reward = self.reward if self.tile_visited_count == len(self.track) or self.new_lap: done = True x, y = self.car.hull.position if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD: done = True step_reward = -100 return self.state, step_reward, done, {} def render(self, mode="human"): assert mode in ["human", "state_pixels", "rgb_array"] if self.screen is None and mode == "human": pygame.display.init() self.screen = pygame.display.set_mode((WINDOW_W, WINDOW_H)) if self.clock is None: self.clock = pygame.time.Clock() if "t" not in self.__dict__: return # reset() not called yet self.surf = pygame.Surface((WINDOW_W, WINDOW_H)) # computing transformations angle = -self.car.hull.angle # Animating first second zoom. zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(self.t, 1) scroll_x = -(self.car.hull.position[0] + PLAYFIELD) * zoom scroll_y = -(self.car.hull.position[1] + PLAYFIELD) * zoom trans = pygame.math.Vector2((scroll_x, scroll_y)).rotate_rad(angle) trans = (WINDOW_W / 2 + trans[0], WINDOW_H / 4 + trans[1]) self.render_road(zoom, trans, angle) self.car.draw(self.surf, zoom, trans, angle, mode != "state_pixels") self.surf = pygame.transform.flip(self.surf, False, True) # showing stats self.render_indicators(WINDOW_W, WINDOW_H) font = pygame.font.Font(pygame.font.get_default_font(), 42) text = font.render("%04i" % self.reward, True, (255, 255, 255), (0, 0, 0)) text_rect = text.get_rect() text_rect.center = (60, WINDOW_H - WINDOW_H * 2.5 / 40.0) self.surf.blit(text, text_rect) if mode == "human": pygame.event.pump() self.clock.tick(self.metadata["render_fps"]) self.screen.fill(0) self.screen.blit(self.surf, (0, 0)) pygame.display.flip() if mode == "rgb_array": return self._create_image_array(self.surf, (VIDEO_W, VIDEO_H)) elif mode == "state_pixels": return self._create_image_array(self.surf, (STATE_W, STATE_H)) else: return self.isopen def render_road(self, zoom, translation, angle): bounds = PLAYFIELD field = [ (2 * bounds, 2 * bounds), (2 * bounds, 0), (0, 0), (0, 2 * bounds), ] trans_field = [] self.draw_colored_polygon(self.surf, field, (102, 204, 102), zoom, translation, angle) k = bounds / (20.0) grass = [] for x in range(0, 40, 2): for y in range(0, 40, 2): grass.append([ (k * x + k, k * y + 0), (k * x + 0, k * y + 0), (k * x + 0, k * y + k), (k * x + k, k * y + k), ]) for poly in grass: self.draw_colored_polygon(self.surf, poly, (102, 230, 102), zoom, translation, angle) for poly, color in self.road_poly: # converting to pixel coordinates poly = [(p[0] + PLAYFIELD, p[1] + PLAYFIELD) for p in poly] color = [int(c * 255) for c in color] self.draw_colored_polygon(self.surf, poly, color, zoom, translation, angle) def render_indicators(self, W, H): s = W / 40.0 h = H / 40.0 color = (0, 0, 0) polygon = [(W, H), (W, H - 5 * h), (0, H - 5 * h), (0, H)] pygame.draw.polygon(self.surf, color=color, points=polygon) def vertical_ind(place, val): return [ (place * s, H - (h + h * val)), ((place + 1) * s, H - (h + h * val)), ((place + 1) * s, H - h), ((place + 0) * s, H - h), ] def horiz_ind(place, val): return [ ((place + 0) * s, H - 4 * h), ((place + val) * s, H - 4 * h), ((place + val) * s, H - 2 * h), ((place + 0) * s, H - 2 * h), ] true_speed = np.sqrt( np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1])) # simple wrapper to render if the indicator value is above a threshold def render_if_min(value, points, color): if abs(value) > 1e-4: pygame.draw.polygon(self.surf, points=points, color=color) render_if_min(true_speed, vertical_ind(5, 0.02 * true_speed), (255, 255, 255)) # ABS sensors render_if_min( self.car.wheels[0].omega, vertical_ind(7, 0.01 * self.car.wheels[0].omega), (0, 0, 255), ) render_if_min( self.car.wheels[1].omega, vertical_ind(8, 0.01 * self.car.wheels[1].omega), (0, 0, 255), ) render_if_min( self.car.wheels[2].omega, vertical_ind(9, 0.01 * self.car.wheels[2].omega), (51, 0, 255), ) render_if_min( self.car.wheels[3].omega, vertical_ind(10, 0.01 * self.car.wheels[3].omega), (51, 0, 255), ) render_if_min( self.car.wheels[0].joint.angle, horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle), (0, 255, 0), ) render_if_min( self.car.hull.angularVelocity, horiz_ind(30, -0.8 * self.car.hull.angularVelocity), (255, 0, 0), ) def draw_colored_polygon(self, surface, poly, color, zoom, translation, angle): poly = [pygame.math.Vector2(c).rotate_rad(angle) for c in poly] poly = [(c[0] * zoom + translation[0], c[1] * zoom + translation[1]) for c in poly] gfxdraw.aapolygon(self.surf, poly, color) gfxdraw.filled_polygon(self.surf, poly, color) def _create_image_array(self, screen, size): scaled_screen = pygame.transform.smoothscale(screen, size) return np.transpose(np.array(pygame.surfarray.pixels3d(scaled_screen)), axes=(1, 0, 2)) def close(self): pygame.quit() if self.screen is not None: pygame.display.quit() self.isopen = False