class CarRacing(gym.Env): metadata = { 'render.modes': ['human', 'rgb_array', 'state_pixels'], 'video.frames_per_second': FPS } def __init__(self): self.seed() self.contactListener_keepref = FrictionDetector(self) self.world = Box2D.b2World( (0, 0), contactListener=self.contactListener_keepref) self.viewer = None self.invisible_state_window = None self.invisible_video_window = None self.road = None self.car = None self.reward = 0.0 self.prev_reward = 0.0 self.action_space = spaces.Box(np.array([-1, 0, 0]), np.array([+1, +1, +1]), dtype=np.float32) # steer, gas, brake self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8) def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _destroy(self): if not self.road: return for t in self.road: self.world.DestroyBody(t) self.road = [] self.car.destroy() def _create_track(self): CHECKPOINTS = 12 # Create checkpoints checkpoints = [] for c in range(CHECKPOINTS): alpha = 2 * math.pi * c / CHECKPOINTS + self.np_random.uniform( 0, 2 * math.pi * 1 / CHECKPOINTS) rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD) if c == 0: alpha = 0 rad = 1.5 * TRACK_RAD if c == CHECKPOINTS - 1: alpha = 2 * math.pi * c / CHECKPOINTS self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS rad = 1.5 * TRACK_RAD checkpoints.append( (alpha, rad * math.cos(alpha), rad * math.sin(alpha))) #print "\n".join(str(h) for h in checkpoints) #self.road_poly = [ ( # uncomment this to see checkpoints # [ (tx,ty) for a,tx,ty in checkpoints ], # (0.7,0.7,0.9) ) ] self.road = [] # Go from one checkpoint to another to create track x, y, beta = 1.5 * TRACK_RAD, 0, 0 dest_i = 0 laps = 0 track = [] no_freeze = 2500 visited_other_side = False while 1: alpha = math.atan2(y, x) if visited_other_side and alpha > 0: laps += 1 visited_other_side = False if alpha < 0: visited_other_side = True alpha += 2 * math.pi while True: # Find destination from checkpoints failed = True while True: dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)] if alpha <= dest_alpha: failed = False break dest_i += 1 if dest_i % len(checkpoints) == 0: break if not failed: break alpha -= 2 * math.pi continue r1x = math.cos(beta) r1y = math.sin(beta) p1x = -r1y p1y = r1x dest_dx = dest_x - x # vector towards destination dest_dy = dest_y - y proj = r1x * dest_dx + r1y * dest_dy # destination vector projected on rad while beta - alpha > 1.5 * math.pi: beta -= 2 * math.pi while beta - alpha < -1.5 * math.pi: beta += 2 * math.pi prev_beta = beta proj *= SCALE if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001 * proj)) if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001 * proj)) x += p1x * TRACK_DETAIL_STEP y += p1y * TRACK_DETAIL_STEP track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y)) if laps > 4: break no_freeze -= 1 if no_freeze == 0: break #print "\n".join([str(t) for t in enumerate(track)]) # Find closed loop range i1..i2, first loop should be ignored, second is OK i1, i2 = -1, -1 i = len(track) while True: i -= 1 if i == 0: return False # Failed pass_through_start = track[i][0] > self.start_alpha and track[ i - 1][0] <= self.start_alpha if pass_through_start and i2 == -1: i2 = i elif pass_through_start and i1 == -1: i1 = i break #print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2-i1)) assert i1 != -1 assert i2 != -1 track = track[i1:i2 - 1] first_beta = track[0][1] first_perp_x = math.cos(first_beta) first_perp_y = math.sin(first_beta) # Length of perpendicular jump to put together head and tail well_glued_together = np.sqrt( np.square(first_perp_x * (track[0][2] - track[-1][2])) + np.square(first_perp_y * (track[0][3] - track[-1][3]))) if well_glued_together > TRACK_DETAIL_STEP: return False # Red-white border on hard turns border = [False] * len(track) for i in range(len(track)): good = True oneside = 0 for neg in range(BORDER_MIN_COUNT): beta1 = track[i - neg - 0][1] beta2 = track[i - neg - 1][1] good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2 oneside += np.sign(beta1 - beta2) good &= abs(oneside) == BORDER_MIN_COUNT border[i] = good for i in range(len(track)): for neg in range(BORDER_MIN_COUNT): border[i - neg] |= border[i] # Create tiles for i in range(len(track)): alpha1, beta1, x1, y1 = track[i] alpha2, beta2, x2, y2 = track[i - 1] road1_l = (x1 - TRACK_WIDTH * math.cos(beta1), y1 - TRACK_WIDTH * math.sin(beta1)) road1_r = (x1 + TRACK_WIDTH * math.cos(beta1), y1 + TRACK_WIDTH * math.sin(beta1)) road2_l = (x2 - TRACK_WIDTH * math.cos(beta2), y2 - TRACK_WIDTH * math.sin(beta2)) road2_r = (x2 + TRACK_WIDTH * math.cos(beta2), y2 + TRACK_WIDTH * math.sin(beta2)) t = self.world.CreateStaticBody(fixtures=fixtureDef( shape=polygonShape( vertices=[road1_l, road1_r, road2_r, road2_l]))) t.userData = t c = 0.01 * (i % 3) t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c] t.road_visited = False t.road_friction = 1.0 t.fixtures[0].sensor = True self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color)) self.road.append(t) if border[i]: side = np.sign(beta2 - beta1) b1_l = (x1 + side * TRACK_WIDTH * math.cos(beta1), y1 + side * TRACK_WIDTH * math.sin(beta1)) b1_r = (x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1), y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1)) b2_l = (x2 + side * TRACK_WIDTH * math.cos(beta2), y2 + side * TRACK_WIDTH * math.sin(beta2)) b2_r = (x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2), y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2)) self.road_poly.append(([b1_l, b1_r, b2_r, b2_l], (1, 1, 1) if i % 2 == 0 else (1, 0, 0))) self.track = track return True def reset(self): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] self.human_render = False while True: success = self._create_track() if success: break #print("retry to generate track (normal if there are not many of this messages)") self.car = Car(self.world, *self.track[0][1:4]) return self.step(None)[0] def step(self, action): # print(self.t * FPS) if action is not None: self.car.steer(-action[0]) self.car.gas(action[1]) self.car.brake(action[2]) self.car.step(1.0 / FPS) self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) self.t += 1.0 / FPS self.state = self.render("state_pixels") step_reward = 0 done = False if action is not None: # First step without action, called from reset() self.reward -= 0.1 # We actually don't want to count fuel spent, we want car to be faster. #self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER self.car.fuel_spent = 0.0 step_reward = self.reward - self.prev_reward self.prev_reward = self.reward if self.tile_visited_count == len(self.track): done = True x, y = self.car.hull.position if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD: done = True step_reward = -100 return self.state, step_reward, done, {} def render(self, mode='human'): if self.viewer is None: from gym.envs.classic_control import rendering self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) self.score_label = pyglet.text.Label('0000', font_size=36, x=20, y=WINDOW_H * 2.5 / 40.00, anchor_x='left', anchor_y='center', color=(255, 255, 255, 255)) self.transform = rendering.Transform() if "t" not in self.__dict__: return # reset() not called yet zoom = ZOOM * SCALE zoom_state = ZOOM * SCALE * STATE_W / WINDOW_W zoom_video = ZOOM * SCALE * VIDEO_W / WINDOW_W scroll_x = self.car.hull.position[0] scroll_y = self.car.hull.position[1] angle = -self.car.hull.angle vel = self.car.hull.linearVelocity if np.linalg.norm(vel) > 0.5: angle = math.atan2(vel[0], vel[1]) self.transform.set_scale(zoom, zoom) self.transform.set_translation( WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) - scroll_y * zoom * math.sin(angle)), WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) + scroll_y * zoom * math.cos(angle))) self.transform.set_rotation(angle) self.car.draw(self.viewer, mode != "state_pixels") arr = None win = self.viewer.window if mode != 'state_pixels': win.switch_to() win.dispatch_events() if mode == "rgb_array" or mode == "state_pixels": win.clear() t = self.transform if mode == 'rgb_array': VP_W = VIDEO_W VP_H = VIDEO_H else: VP_W = STATE_W VP_H = STATE_H gl.glViewport(0, 0, VP_W, VP_H) t.enable() self.render_road() for geom in self.viewer.onetime_geoms: geom.render() t.disable() self.render_indicators(WINDOW_W, WINDOW_H) # TODO: find why 2x needed, wtf image_data = pyglet.image.get_buffer_manager().get_color_buffer( ).get_image_data() arr = np.fromstring(image_data.data, dtype=np.uint8, sep='') arr = arr.reshape(VP_H, VP_W, 4) arr = arr[::-1, :, 0:3] if mode == "rgb_array" and not self.human_render: # agent can call or not call env.render() itself when recording video. win.flip() if mode == 'human': self.human_render = True win.clear() t = self.transform gl.glViewport(0, 0, WINDOW_W, WINDOW_H) t.enable() self.render_road() for geom in self.viewer.onetime_geoms: geom.render() t.disable() self.render_indicators(WINDOW_W, WINDOW_H) win.flip() self.viewer.onetime_geoms = [] return arr def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None def render_road(self): gl.glBegin(gl.GL_QUADS) gl.glColor4f(0.4, 0.8, 0.4, 1.0) gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0) gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0) gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0) gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0) gl.glColor4f(0.4, 0.9, 0.4, 1.0) k = PLAYFIELD / 20.0 for x in range(-20, 20, 2): for y in range(-20, 20, 2): gl.glVertex3f(k * x + k, k * y + 0, 0) gl.glVertex3f(k * x + 0, k * y + 0, 0) gl.glVertex3f(k * x + 0, k * y + k, 0) gl.glVertex3f(k * x + k, k * y + k, 0) for poly, color in self.road_poly: gl.glColor4f(color[0], color[1], color[2], 1) for p in poly: gl.glVertex3f(p[0], p[1], 0) gl.glEnd() def render_indicators(self, W, H): gl.glBegin(gl.GL_QUADS) s = W / 40.0 h = H / 40.0 gl.glColor4f(0, 0, 0, 1) gl.glVertex3f(W, 0, 0) gl.glVertex3f(W, 5 * h, 0) gl.glVertex3f(0, 5 * h, 0) gl.glVertex3f(0, 0, 0) def vertical_ind(place, val, color): gl.glColor4f(color[0], color[1], color[2], 1) gl.glVertex3f((place + 0) * s, h + h * val, 0) gl.glVertex3f((place + 1) * s, h + h * val, 0) gl.glVertex3f((place + 1) * s, h, 0) gl.glVertex3f((place + 0) * s, h, 0) def horiz_ind(place, val, color): gl.glColor4f(color[0], color[1], color[2], 1) gl.glVertex3f((place + 0) * s, 4 * h, 0) gl.glVertex3f((place + val) * s, 4 * h, 0) gl.glVertex3f((place + val) * s, 2 * h, 0) gl.glVertex3f((place + 0) * s, 2 * h, 0) true_speed = np.sqrt( np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1])) vertical_ind(5, 0.02 * true_speed, (1, 1, 1)) vertical_ind(7, 0.01 * self.car.wheels[0].omega, (0.0, 0, 1)) # ABS sensors vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1)) vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1)) vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1)) horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0)) horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0)) gl.glEnd() self.score_label.text = "%04i" % self.reward self.score_label.draw()
class CarRacing(gym.Env, EzPickle): metadata = { 'render.modes': ['human', 'rgb_array', 'state_pixels'], 'video.frames_per_second': FPS } def __init__(self, verbose=1): EzPickle.__init__(self) self.seed() self.contactListener_keepref = FrictionDetector(self) self.world = Box2D.b2World( (0, 0), contactListener=self.contactListener_keepref) self.viewer = None self.invisible_state_window = None self.invisible_video_window = None self.road = None self.car = None self.reward = 0.0 self.prev_reward = 0.0 self.verbose = verbose self.fd_tile = fixtureDef(shape=polygonShape( vertices=[(0, 0), (1, 0), (1, -1), (0, -1)])) self.action_space = spaces.Box(np.array([-1, 0, 0]), np.array([+1, +1, +1]), dtype=np.float32) # steer, gas, brake self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8) CHECKPOINTS = 12 # Create checkpoints # TODO Use a real way to keep a constant track across training runs self.checkpoints = [] self.checkpoints.append((0, 225.0, 0.0)) self.checkpoints.append( (0.7825323624208509, 89.6427647192468, 89.1304349066121)) self.checkpoints.append( (1.5543323243350344, 1.783985395462951, 108.34693482727236)) self.checkpoints.append( (1.6057305460922464, -2.2173644459530517, 63.446740574663174)) self.checkpoints.append( (2.6175081644916047, -58.76396976672586, 33.965461046114534)) self.checkpoints.append( (2.7871461118931458, -134.63944761816262, 49.82679389320398)) self.checkpoints.append( (3.414113547480756, -106.41825645850612, -29.741137759708423)) self.checkpoints.append( (3.8745797378794, -77.61403468584427, -69.87679530100709)) self.checkpoints.append( (4.193711736042842, -33.56139367373087, -58.79641863577176)) self.checkpoints.append( (4.928823629511352, 29.852520836123745, -135.76810358020867)) self.checkpoints.append( (5.29734709463665, 68.99766439052978, -104.18233435806235)) self.checkpoints.append( (5.759586531581287, 194.85571585149864, -112.5000000000001)) self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS ''' self.checkpoints = [] for c in range(CHECKPOINTS): alpha = 2*math.pi*c/CHECKPOINTS + self.np_random.uniform(0, 2*math.pi*1/CHECKPOINTS) rad = self.np_random.uniform(TRACK_RAD/3, TRACK_RAD) if c==0: alpha = 0 rad = 1.5*TRACK_RAD if c==CHECKPOINTS-1: alpha = 2*math.pi*c/CHECKPOINTS rad = 1.5*TRACK_RAD self.checkpoints.append( (alpha, rad*math.cos(alpha), rad*math.sin(alpha)) ) ''' def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _destroy(self): if not self.road: return for t in self.road: self.world.DestroyBody(t) self.road = [] self.car.destroy() def _create_track(self): checkpoints = self.checkpoints #print ("\n".join(str(h) for h in checkpoints)) # self.road_poly = [ ( # uncomment this to see checkpoints # [ (tx,ty) for a,tx,ty in checkpoints ], # (0.7,0.7,0.9) ) ] self.road = [] # Go from one checkpoint to another to create track x, y, beta = 1.5 * TRACK_RAD, 0, 0 dest_i = 0 laps = 0 track = [] no_freeze = 2500 visited_other_side = False while True: alpha = math.atan2(y, x) if visited_other_side and alpha > 0: laps += 1 visited_other_side = False if alpha < 0: visited_other_side = True alpha += 2 * math.pi while True: # Find destination from checkpoints failed = True while True: dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)] if alpha <= dest_alpha: failed = False break dest_i += 1 if dest_i % len(checkpoints) == 0: break if not failed: break alpha -= 2 * math.pi continue r1x = math.cos(beta) r1y = math.sin(beta) p1x = -r1y p1y = r1x dest_dx = dest_x - x # vector towards destination dest_dy = dest_y - y proj = r1x * dest_dx + r1y * dest_dy # destination vector projected on rad while beta - alpha > 1.5 * math.pi: beta -= 2 * math.pi while beta - alpha < -1.5 * math.pi: beta += 2 * math.pi prev_beta = beta proj *= SCALE if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001 * proj)) if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001 * proj)) x += p1x * TRACK_DETAIL_STEP y += p1y * TRACK_DETAIL_STEP track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y)) if laps > 4: break no_freeze -= 1 if no_freeze == 0: break # print "\n".join([str(t) for t in enumerate(track)]) # Find closed loop range i1..i2, first loop should be ignored, second is OK i1, i2 = -1, -1 i = len(track) while True: i -= 1 if i == 0: return False # Failed pass_through_start = track[i][0] > self.start_alpha and track[ i - 1][0] <= self.start_alpha if pass_through_start and i2 == -1: i2 = i elif pass_through_start and i1 == -1: i1 = i break if self.verbose == 1: print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1)) assert i1 != -1 assert i2 != -1 track = track[i1:i2 - 1] first_beta = track[0][1] first_perp_x = math.cos(first_beta) first_perp_y = math.sin(first_beta) # Length of perpendicular jump to put together head and tail well_glued_together = np.sqrt( np.square(first_perp_x * (track[0][2] - track[-1][2])) + np.square(first_perp_y * (track[0][3] - track[-1][3]))) if well_glued_together > TRACK_DETAIL_STEP: return False # Red-white border on hard turns ''' border = [False]*len(track) for i in range(len(track)): good = True oneside = 0 for neg in range(BORDER_MIN_COUNT): beta1 = track[i-neg-0][1] beta2 = track[i-neg-1][1] good &= abs(beta1 - beta2) > TRACK_TURN_RATE*0.2 oneside += np.sign(beta1 - beta2) good &= abs(oneside) == BORDER_MIN_COUNT border[i] = good for i in range(len(track)): for neg in range(BORDER_MIN_COUNT): border[i-neg] |= border[i] ''' # Create tiles for i in range(len(track)): alpha1, beta1, x1, y1 = track[i] alpha2, beta2, x2, y2 = track[i - 1] road1_l = (x1 - TRACK_WIDTH * math.cos(beta1), y1 - TRACK_WIDTH * math.sin(beta1)) road1_r = (x1 + TRACK_WIDTH * math.cos(beta1), y1 + TRACK_WIDTH * math.sin(beta1)) road2_l = (x2 - TRACK_WIDTH * math.cos(beta2), y2 - TRACK_WIDTH * math.sin(beta2)) road2_r = (x2 + TRACK_WIDTH * math.cos(beta2), y2 + TRACK_WIDTH * math.sin(beta2)) vertices = [road1_l, road1_r, road2_r, road2_l] self.fd_tile.shape.vertices = vertices t = self.world.CreateStaticBody(fixtures=self.fd_tile) t.userData = t c = 0.01 * (i % 3) t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c] t.road_visited = False t.road_friction = 1.0 t.fixtures[0].sensor = True self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color)) self.road.append(t) ''' if border[i]: side = np.sign(beta2 - beta1) b1_l = (x1 + side* TRACK_WIDTH *math.cos(beta1), y1 + side* TRACK_WIDTH *math.sin(beta1)) b1_r = (x1 + side*(TRACK_WIDTH+BORDER)*math.cos(beta1), y1 + side*(TRACK_WIDTH+BORDER)*math.sin(beta1)) b2_l = (x2 + side* TRACK_WIDTH *math.cos(beta2), y2 + side* TRACK_WIDTH *math.sin(beta2)) b2_r = (x2 + side*(TRACK_WIDTH+BORDER)*math.cos(beta2), y2 + side*(TRACK_WIDTH+BORDER)*math.sin(beta2)) self.road_poly.append(( [b1_l, b1_r, b2_r, b2_l], (1,1,1) if i%2==0 else (1,0,0) )) ''' self.track = track return True def reset(self): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] while True: success = self._create_track() if success: break if self.verbose == 1: print( "retry to generate track (normal if there are not many of this messages)" ) self.car = Car(self.world, *self.track[0][1:4]) return self.step(None)[0] def step(self, action): if action is not None: self.car.steer(-action[0]) self.car.gas(action[1]) self.car.brake(action[2]) self.car.step(1.0 / FPS) self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) self.t += 1.0 / FPS self.state = self.render("state_pixels") step_reward = 0 done = False if action is not None: # First step without action, called from reset() self.reward -= 0.1 # We actually don't want to count fuel spent, we want car to be faster. # self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER self.car.fuel_spent = 0.0 step_reward = self.reward - self.prev_reward self.prev_reward = self.reward if self.tile_visited_count == len(self.track): done = True x, y = self.car.hull.position if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD: done = True step_reward = -100 # TODO: Exit if far off track return self.state, step_reward, done, {} def render(self, mode='human'): assert mode in ['human', 'state_pixels', 'rgb_array'] if self.viewer is None: from gym.envs.classic_control import rendering self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) self.score_label = pyglet.text.Label('0000', font_size=36, x=20, y=WINDOW_H * 2.5 / 40.00, anchor_x='left', anchor_y='center', color=(255, 255, 255, 255)) self.transform = rendering.Transform() if "t" not in self.__dict__: return # reset() not called yet zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min( self.t, 1) # Animate zoom first second zoom_state = ZOOM * SCALE * STATE_W / WINDOW_W zoom_video = ZOOM * SCALE * VIDEO_W / WINDOW_W scroll_x = self.car.hull.position[0] scroll_y = self.car.hull.position[1] angle = -self.car.hull.angle vel = self.car.hull.linearVelocity #if np.linalg.norm(vel) > 0.5: # angle = math.atan2(vel[0], vel[1]) self.transform.set_scale(zoom, zoom) self.transform.set_translation( WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) - scroll_y * zoom * math.sin(angle)), WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) + scroll_y * zoom * math.cos(angle))) self.transform.set_rotation(angle) self.car.draw(self.viewer, mode != "state_pixels") arr = None win = self.viewer.window win.switch_to() win.dispatch_events() win.clear() t = self.transform if mode == 'rgb_array': VP_W = VIDEO_W VP_H = VIDEO_H elif mode == 'state_pixels': VP_W = STATE_W VP_H = STATE_H else: pixel_scale = 1 if hasattr(win.context, '_nscontext'): pixel_scale = win.context._nscontext.view().backingScaleFactor( ) # pylint: disable=protected-access VP_W = int(pixel_scale * WINDOW_W) VP_H = int(pixel_scale * WINDOW_H) gl.glViewport(0, 0, VP_W, VP_H) t.enable() self.render_road() for geom in self.viewer.onetime_geoms: geom.render() self.viewer.onetime_geoms = [] t.disable() # self.render_indicators(WINDOW_W, WINDOW_H) if mode == 'human': win.flip() return self.viewer.isopen image_data = pyglet.image.get_buffer_manager().get_color_buffer( ).get_image_data() arr = np.fromstring(image_data.data, dtype=np.uint8, sep='') arr = arr.reshape(VP_H, VP_W, 4) arr = arr[::-1, :, 0:3] return arr def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None def render_road(self): gl.glBegin(gl.GL_QUADS) gl.glColor4f(0.4, 0.8, 0.4, 1.0) gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0) gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0) gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0) gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0) gl.glColor4f(0.4, 0.9, 0.4, 1.0) k = PLAYFIELD / 20.0 #for x in range(-20, 20, 2): # for y in range(-20, 20, 2): # gl.glVertex3f(k*x + k, k*y + 0, 0) # gl.glVertex3f(k*x + 0, k*y + 0, 0) # gl.glVertex3f(k*x + 0, k*y + k, 0) # gl.glVertex3f(k*x + k, k*y + k, 0) for poly, color in self.road_poly: gl.glColor4f(color[0], color[1], color[2], 1) for p in poly: gl.glVertex3f(p[0], p[1], 0) gl.glEnd() def render_indicators(self, W, H): gl.glBegin(gl.GL_QUADS) s = W / 40.0 h = H / 40.0 gl.glColor4f(0, 0, 0, 1) gl.glVertex3f(W, 0, 0) gl.glVertex3f(W, 5 * h, 0) gl.glVertex3f(0, 5 * h, 0) gl.glVertex3f(0, 0, 0) def vertical_ind(place, val, color): gl.glColor4f(color[0], color[1], color[2], 1) gl.glVertex3f((place + 0) * s, h + h * val, 0) gl.glVertex3f((place + 1) * s, h + h * val, 0) gl.glVertex3f((place + 1) * s, h, 0) gl.glVertex3f((place + 0) * s, h, 0) def horiz_ind(place, val, color): gl.glColor4f(color[0], color[1], color[2], 1) gl.glVertex3f((place + 0) * s, 4 * h, 0) gl.glVertex3f((place + val) * s, 4 * h, 0) gl.glVertex3f((place + val) * s, 2 * h, 0) gl.glVertex3f((place + 0) * s, 2 * h, 0) true_speed = np.sqrt( np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1])) vertical_ind(5, 0.02 * true_speed, (1, 1, 1)) vertical_ind(7, 0.01 * self.car.wheels[0].omega, (0.0, 0, 1)) # ABS sensors vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1)) vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1)) vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1)) horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0)) horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0)) gl.glEnd() self.score_label.text = "%04i" % self.reward self.score_label.draw()
class CarRacing(gym.Env, EzPickle): metadata = { 'render.modes': ['human', 'rgb_array', 'state_pixels'], 'video.frames_per_second' : FPS } def __init__(self, verbose=1): EzPickle.__init__(self) self.seed() self.contactListener_keepref = FrictionDetector(self) self.world = Box2D.b2World((0,0), contactListener=self.contactListener_keepref) self.viewer = None self.invisible_state_window = None self.invisible_video_window = None self.road = None self.car = None self.reward = 0.0 self.prev_reward = 0.0 self.verbose = verbose self.action_space = spaces.Box( np.array([-1,0,0]), np.array([+1,+1,+1]), dtype=np.float32) # steer, gas, brake self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8) def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _destroy(self): if not self.road: return for t in self.road: self.world.DestroyBody(t) self.road = [] self.car.destroy() def _create_track(self): CHECKPOINTS = 12 # Create checkpoints checkpoints = [] for c in range(CHECKPOINTS): alpha = 2*math.pi*c/CHECKPOINTS + self.np_random.uniform(0, 2*math.pi*1/CHECKPOINTS) rad = self.np_random.uniform(TRACK_RAD/3, TRACK_RAD) if c==0: alpha = 0 rad = 1.5*TRACK_RAD if c==CHECKPOINTS-1: alpha = 2*math.pi*c/CHECKPOINTS self.start_alpha = 2*math.pi*(-0.5)/CHECKPOINTS rad = 1.5*TRACK_RAD checkpoints.append( (alpha, rad*math.cos(alpha), rad*math.sin(alpha)) ) #print "\n".join(str(h) for h in checkpoints) #self.road_poly = [ ( # uncomment this to see checkpoints # [ (tx,ty) for a,tx,ty in checkpoints ], # (0.7,0.7,0.9) ) ] self.road = [] # Go from one checkpoint to another to create track x, y, beta = 1.5*TRACK_RAD, 0, 0 dest_i = 0 laps = 0 track = [] no_freeze = 2500 visited_other_side = False while 1: alpha = math.atan2(y, x) if visited_other_side and alpha > 0: laps += 1 visited_other_side = False if alpha < 0: visited_other_side = True alpha += 2*math.pi while True: # Find destination from checkpoints failed = True while True: dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)] if alpha <= dest_alpha: failed = False break dest_i += 1 if dest_i % len(checkpoints) == 0: break if not failed: break alpha -= 2*math.pi continue r1x = math.cos(beta) r1y = math.sin(beta) p1x = -r1y p1y = r1x dest_dx = dest_x - x # vector towards destination dest_dy = dest_y - y proj = r1x*dest_dx + r1y*dest_dy # destination vector projected on rad while beta - alpha > 1.5*math.pi: beta -= 2*math.pi while beta - alpha < -1.5*math.pi: beta += 2*math.pi prev_beta = beta proj *= SCALE if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001*proj)) if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001*proj)) x += p1x*TRACK_DETAIL_STEP y += p1y*TRACK_DETAIL_STEP track.append( (alpha,prev_beta*0.5 + beta*0.5,x,y) ) if laps > 4: break no_freeze -= 1 if no_freeze==0: break #print "\n".join([str(t) for t in enumerate(track)]) # Find closed loop range i1..i2, first loop should be ignored, second is OK i1, i2 = -1, -1 i = len(track) while True: i -= 1 if i==0: return False # Failed pass_through_start = track[i][0] > self.start_alpha and track[i-1][0] <= self.start_alpha if pass_through_start and i2==-1: i2 = i elif pass_through_start and i1==-1: i1 = i break if self.verbose == 1: print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2-i1)) assert i1!=-1 assert i2!=-1 track = track[i1:i2-1] first_beta = track[0][1] first_perp_x = math.cos(first_beta) first_perp_y = math.sin(first_beta) # Length of perpendicular jump to put together head and tail well_glued_together = np.sqrt( np.square( first_perp_x*(track[0][2] - track[-1][2]) ) + np.square( first_perp_y*(track[0][3] - track[-1][3]) )) if well_glued_together > TRACK_DETAIL_STEP: return False # Red-white border on hard turns border = [False]*len(track) for i in range(len(track)): good = True oneside = 0 for neg in range(BORDER_MIN_COUNT): beta1 = track[i-neg-0][1] beta2 = track[i-neg-1][1] good &= abs(beta1 - beta2) > TRACK_TURN_RATE*0.2 oneside += np.sign(beta1 - beta2) good &= abs(oneside) == BORDER_MIN_COUNT border[i] = good for i in range(len(track)): for neg in range(BORDER_MIN_COUNT): border[i-neg] |= border[i] # Create tiles for i in range(len(track)): alpha1, beta1, x1, y1 = track[i] alpha2, beta2, x2, y2 = track[i-1] road1_l = (x1 - TRACK_WIDTH*math.cos(beta1), y1 - TRACK_WIDTH*math.sin(beta1)) road1_r = (x1 + TRACK_WIDTH*math.cos(beta1), y1 + TRACK_WIDTH*math.sin(beta1)) road2_l = (x2 - TRACK_WIDTH*math.cos(beta2), y2 - TRACK_WIDTH*math.sin(beta2)) road2_r = (x2 + TRACK_WIDTH*math.cos(beta2), y2 + TRACK_WIDTH*math.sin(beta2)) t = self.world.CreateStaticBody( fixtures = fixtureDef( shape=polygonShape(vertices=[road1_l, road1_r, road2_r, road2_l]) )) t.userData = t c = 0.01*(i%3) t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c] t.road_visited = False t.road_friction = 1.0 t.fixtures[0].sensor = True self.road_poly.append(( [road1_l, road1_r, road2_r, road2_l], t.color )) self.road.append(t) if border[i]: side = np.sign(beta2 - beta1) b1_l = (x1 + side* TRACK_WIDTH *math.cos(beta1), y1 + side* TRACK_WIDTH *math.sin(beta1)) b1_r = (x1 + side*(TRACK_WIDTH+BORDER)*math.cos(beta1), y1 + side*(TRACK_WIDTH+BORDER)*math.sin(beta1)) b2_l = (x2 + side* TRACK_WIDTH *math.cos(beta2), y2 + side* TRACK_WIDTH *math.sin(beta2)) b2_r = (x2 + side*(TRACK_WIDTH+BORDER)*math.cos(beta2), y2 + side*(TRACK_WIDTH+BORDER)*math.sin(beta2)) self.road_poly.append(( [b1_l, b1_r, b2_r, b2_l], (1,1,1) if i%2==0 else (1,0,0) )) self.track = track return True def reset(self): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] while True: success = self._create_track() if success: break if self.verbose == 1: print("retry to generate track (normal if there are not many of this messages)") self.car = Car(self.world, *self.track[0][1:4]) return self.step(None)[0] def step(self, action): if action is not None: self.car.steer(-action[0]) self.car.gas(action[1]) self.car.brake(action[2]) self.car.step(1.0/FPS) self.world.Step(1.0/FPS, 6*30, 2*30) self.t += 1.0/FPS self.state = self.render("state_pixels") step_reward = 0 done = False if action is not None: # First step without action, called from reset() self.reward -= 0.1 # We actually don't want to count fuel spent, we want car to be faster. #self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER self.car.fuel_spent = 0.0 step_reward = self.reward - self.prev_reward self.prev_reward = self.reward if self.tile_visited_count==len(self.track): done = True x, y = self.car.hull.position if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD: done = True step_reward = -100 return self.state, step_reward, done, {} def render(self, mode='human'): assert mode in ['human', 'state_pixels', 'rgb_array'] if self.viewer is None: from gym.envs.classic_control import rendering self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) self.score_label = pyglet.text.Label('0000', font_size=36, x=20, y=WINDOW_H*2.5/40.00, anchor_x='left', anchor_y='center', color=(255,255,255,255)) self.transform = rendering.Transform() if "t" not in self.__dict__: return # reset() not called yet zoom = 0.1*SCALE*max(1-self.t, 0) + ZOOM*SCALE*min(self.t, 1) # Animate zoom first second zoom_state = ZOOM*SCALE*STATE_W/WINDOW_W zoom_video = ZOOM*SCALE*VIDEO_W/WINDOW_W scroll_x = self.car.hull.position[0] scroll_y = self.car.hull.position[1] angle = -self.car.hull.angle vel = self.car.hull.linearVelocity if np.linalg.norm(vel) > 0.5: angle = math.atan2(vel[0], vel[1]) self.transform.set_scale(zoom, zoom) self.transform.set_translation( WINDOW_W/2 - (scroll_x*zoom*math.cos(angle) - scroll_y*zoom*math.sin(angle)), WINDOW_H/4 - (scroll_x*zoom*math.sin(angle) + scroll_y*zoom*math.cos(angle)) ) self.transform.set_rotation(angle) self.car.draw(self.viewer, mode!="state_pixels") arr = None win = self.viewer.window win.switch_to() win.dispatch_events() win.clear() t = self.transform if mode=='rgb_array': VP_W = VIDEO_W VP_H = VIDEO_H elif mode == 'state_pixels': VP_W = STATE_W VP_H = STATE_H else: pixel_scale = 1 if hasattr(win.context, '_nscontext'): pixel_scale = win.context._nscontext.view().backingScaleFactor() # pylint: disable=protected-access VP_W = pixel_scale * WINDOW_W VP_H = pixel_scale * WINDOW_H gl.glViewport(0, 0, VP_W, VP_H) t.enable() self.render_road() for geom in self.viewer.onetime_geoms: geom.render() self.viewer.onetime_geoms = [] t.disable() self.render_indicators(WINDOW_W, WINDOW_H) if mode == 'human': win.flip() return self.viewer.isopen image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data() arr = np.fromstring(image_data.data, dtype=np.uint8, sep='') arr = arr.reshape(VP_H, VP_W, 4) arr = arr[::-1, :, 0:3] return arr def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None def render_road(self): gl.glBegin(gl.GL_QUADS) gl.glColor4f(0.4, 0.8, 0.4, 1.0) gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0) gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0) gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0) gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0) gl.glColor4f(0.4, 0.9, 0.4, 1.0) k = PLAYFIELD/20.0 for x in range(-20, 20, 2): for y in range(-20, 20, 2): gl.glVertex3f(k*x + k, k*y + 0, 0) gl.glVertex3f(k*x + 0, k*y + 0, 0) gl.glVertex3f(k*x + 0, k*y + k, 0) gl.glVertex3f(k*x + k, k*y + k, 0) for poly, color in self.road_poly: gl.glColor4f(color[0], color[1], color[2], 1) for p in poly: gl.glVertex3f(p[0], p[1], 0) gl.glEnd() def render_indicators(self, W, H): gl.glBegin(gl.GL_QUADS) s = W/40.0 h = H/40.0 gl.glColor4f(0,0,0,1) gl.glVertex3f(W, 0, 0) gl.glVertex3f(W, 5*h, 0) gl.glVertex3f(0, 5*h, 0) gl.glVertex3f(0, 0, 0) def vertical_ind(place, val, color): gl.glColor4f(color[0], color[1], color[2], 1) gl.glVertex3f((place+0)*s, h + h*val, 0) gl.glVertex3f((place+1)*s, h + h*val, 0) gl.glVertex3f((place+1)*s, h, 0) gl.glVertex3f((place+0)*s, h, 0) def horiz_ind(place, val, color): gl.glColor4f(color[0], color[1], color[2], 1) gl.glVertex3f((place+0)*s, 4*h , 0) gl.glVertex3f((place+val)*s, 4*h, 0) gl.glVertex3f((place+val)*s, 2*h, 0) gl.glVertex3f((place+0)*s, 2*h, 0) true_speed = np.sqrt(np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1])) vertical_ind(5, 0.02*true_speed, (1,1,1)) vertical_ind(7, 0.01*self.car.wheels[0].omega, (0.0,0,1)) # ABS sensors vertical_ind(8, 0.01*self.car.wheels[1].omega, (0.0,0,1)) vertical_ind(9, 0.01*self.car.wheels[2].omega, (0.2,0,1)) vertical_ind(10,0.01*self.car.wheels[3].omega, (0.2,0,1)) horiz_ind(20, -10.0*self.car.wheels[0].joint.angle, (0,1,0)) horiz_ind(30, -0.8*self.car.hull.angularVelocity, (1,0,0)) gl.glEnd() self.score_label.text = "%04i" % self.reward self.score_label.draw()
class CarRacing(gym.Env, EzPickle): metadata = { 'render.modes': ['human', 'rgb_array', 'state_pixels'], 'video.frames_per_second': FPS } def __init__(self, seed=None, verbose=0): EzPickle.__init__(self) #self.contactListener_keepref = FrictionDetector(self) #self.world = Box2D.b2World((0,0), contactListener=self.contactListener_keepref) self.world = Box2D.b2World((0, 0)) self.id = self.seed(seed=seed) self.viewer = None self.invisible_state_window = None self.invisible_video_window = None self.labels = [] self.road = None self.car = None self.dt = 1.0 / FPS self.action = np.zeros((3, )) self.state = np.zeros((11, )) self.reward = 0.0 self.prev_reward = 0.0 self.verbose = verbose self.track_width = TRACK_WIDTH self.fd_tile = fixtureDef(shape=polygonShape( vertices=[(0, 0), (1, 0), (1, -1), (0, -1)])) self.action_space = spaces.Box(np.array([-1, 0, 0]), np.array([1, 1, 1]), dtype=np.float32) self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(11, ), dtype=np.float32) #self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8) def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return hex(seed) def _destroy(self): if not self.road: return for t in self.road: self.world.DestroyBody(t) self.road = [] self.car.destroy() def _create_track(self): CHECKPOINTS = 12 # Create checkpoints checkpoints = [] for c in range(CHECKPOINTS): alpha = 2 * math.pi * c / CHECKPOINTS + self.np_random.uniform( 0, 2 * math.pi * 1 / CHECKPOINTS) rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD) if c == 0: alpha = 0 rad = 1.5 * TRACK_RAD if c == CHECKPOINTS - 1: alpha = 2 * math.pi * c / CHECKPOINTS self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS rad = 1.5 * TRACK_RAD checkpoints.append( (alpha, rad * math.cos(alpha), rad * math.sin(alpha))) #print "\n".join(str(h) for h in checkpoints) #self.road_poly = [([(tx,ty) for a,tx,ty in checkpoints], (0.7,0.7,0.9))] # uncomment this to see checkpoints self.road = [] # Go from one checkpoint to another to create track x, y, beta = 1.5 * TRACK_RAD, 0, 0 dest_i = 0 laps = 0 track = [] no_freeze = 2500 visited_other_side = False while True: alpha = math.atan2(y, x) if visited_other_side and alpha > 0: laps += 1 visited_other_side = False if alpha < 0: visited_other_side = True alpha += 2 * math.pi while True: # Find destination from checkpoints failed = True while True: dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)] if alpha <= dest_alpha: failed = False break dest_i += 1 if dest_i % len(checkpoints) == 0: break if not failed: break alpha -= 2 * math.pi continue r1x = math.cos(beta) r1y = math.sin(beta) p1x = -r1y p1y = r1x dest_dx = dest_x - x # vector towards destination dest_dy = dest_y - y proj = r1x * dest_dx + r1y * dest_dy # destination vector projected on rad while beta - alpha > 1.5 * math.pi: beta -= 2 * math.pi while beta - alpha < -1.5 * math.pi: beta += 2 * math.pi prev_beta = beta proj *= SCALE if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001 * proj)) if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001 * proj)) x += p1x * TRACK_DETAIL_STEP y += p1y * TRACK_DETAIL_STEP track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y)) if laps > 4: break no_freeze -= 1 if no_freeze == 0: break #print "\n".join([str(t) for t in enumerate(track)]) # Find closed loop range i1..i2, first loop should be ignored, second is OK i1, i2 = -1, -1 i = len(track) while True: i -= 1 if i == 0: return False # Failed pass_through_start = track[i][0] > self.start_alpha and track[ i - 1][0] <= self.start_alpha if pass_through_start and i2 == -1: i2 = i elif pass_through_start and i1 == -1: i1 = i break if self.verbose == 1: print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1)) assert i1 != -1 assert i2 != -1 track = track[i1:i2 - 1] first_beta = track[0][1] first_perp_x = math.cos(first_beta) first_perp_y = math.sin(first_beta) # Length of perpendicular jump to put together head and tail well_glued_together = np.sqrt( np.square(first_perp_x * (track[0][2] - track[-1][2])) + np.square(first_perp_y * (track[0][3] - track[-1][3]))) if well_glued_together > TRACK_DETAIL_STEP: return False # Red-white border on hard turns border = [False] * len(track) for i in range(len(track)): good = True oneside = 0 for neg in range(BORDER_MIN_COUNT): beta1 = track[i - neg - 0][1] beta2 = track[i - neg - 1][1] good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2 oneside += np.sign(beta1 - beta2) good &= abs(oneside) == BORDER_MIN_COUNT border[i] = good for i in range(len(track)): for neg in range(BORDER_MIN_COUNT): border[i - neg] |= border[i] # Create tiles for i in range(len(track)): alpha1, beta1, x1, y1 = track[i] alpha2, beta2, x2, y2 = track[i - 1] road1_l = (x1 - TRACK_WIDTH * math.cos(beta1), y1 - TRACK_WIDTH * math.sin(beta1)) road1_r = (x1 + TRACK_WIDTH * math.cos(beta1), y1 + TRACK_WIDTH * math.sin(beta1)) road2_l = (x2 - TRACK_WIDTH * math.cos(beta2), y2 - TRACK_WIDTH * math.sin(beta2)) road2_r = (x2 + TRACK_WIDTH * math.cos(beta2), y2 + TRACK_WIDTH * math.sin(beta2)) vertices = [road1_l, road1_r, road2_r, road2_l] self.fd_tile.shape.vertices = vertices t = self.world.CreateStaticBody(fixtures=self.fd_tile) t.userData = t c = 0.01 * (i % 3) #t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c] t.color = ROAD_COLOR t.road_visited = False t.road_friction = ROAD_FRICTION t.fixtures[0].sensor = True self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color)) self.road.append(t) if border[i]: side = np.sign(beta2 - beta1) b1_l = (x1 + side * TRACK_WIDTH * math.cos(beta1), y1 + side * TRACK_WIDTH * math.sin(beta1)) b1_r = (x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1), y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1)) b2_l = (x2 + side * TRACK_WIDTH * math.cos(beta2), y2 + side * TRACK_WIDTH * math.sin(beta2)) b2_r = (x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2), y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2)) self.road_poly.append(([b1_l, b1_r, b2_r, b2_l], (1, 1, 1) if i % 2 == 0 else (1, 0, 0))) self.track = track return True def reset(self): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] while True: success = self._create_track() if success: break if self.verbose == 1: print( "retry to generate track (normal if there are not many of this messages)" ) self.car = Car(self.world, *self.track[0][1:4]) return self.step(None) def step(self, action): if action is not None: self.action = np.array(action) self.car.steer(-action[0]) self.car.gas(action[1]) self.car.brake(action[2]) self.car.step(self.dt) self.world.Step(self.dt, 6 * 30, 2 * 30) self.t += self.dt self.render("state_pixels") # Update vehicle state self.state[0:2] = self.car.hull.position self.state[2] = (self.car.hull.angle + np.pi / 2) % (2 * np.pi) self.state[3:5] = self.car.hull.linearVelocity self.state[5] = self.car.hull.angularVelocity self.state[6] = self.car.wheels[0].joint.angle self.state[7] = self.car.wheels[0].omega self.state[8] = self.car.wheels[1].omega self.state[9] = self.car.wheels[2].omega self.state[10] = self.car.wheels[3].omega step_reward = 0 done = False if action is not None: # First step without action, called from reset() self.reward -= 0.1 # We actually don't want to count fuel spent, we want car to be faster. # self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER self.car.fuel_spent = 0.0 step_reward = self.reward - self.prev_reward self.prev_reward = self.reward if self.tile_visited_count == len(self.track): done = True x, y = self.car.hull.position if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD: done = True step_reward = -100 return self.state def render(self, mode='human'): assert mode in ['human', 'state_pixels', 'rgb_array'] if self.viewer is None: from gym.envs.classic_control import rendering self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) self.labels.append( pyglet.text.Label('Input', font_size=15, x=WINDOW_W / 16 * 3, y=WINDOW_H / 12, anchor_x='center', anchor_y='center', color=(255, 255, 255, 255))) self.labels.append( pyglet.text.Label('S', font_size=12, x=WINDOW_W / 64 * 7, y=WINDOW_H / 100 * 3, anchor_x='center', anchor_y='center', color=(255, 255, 255, 255))) self.labels.append( pyglet.text.Label('T', font_size=12, x=WINDOW_W / 64 * 14, y=WINDOW_H / 100 * 3, anchor_x='center', anchor_y='center', color=(255, 255, 255, 255))) self.labels.append( pyglet.text.Label('B', font_size=12, x=WINDOW_W / 64 * 17, y=WINDOW_H / 100 * 3, anchor_x='center', anchor_y='center', color=(255, 255, 255, 255))) self.labels.append( pyglet.text.Label('Linear Velocity', font_size=15, x=WINDOW_W / 2, y=WINDOW_H / 12, anchor_x='center', anchor_y='center', color=(255, 255, 255, 255))) self.labels.append( pyglet.text.Label('FL FR', font_size=12, x=WINDOW_W / 16 * 7, y=WINDOW_H / 100 * 3, anchor_x='center', anchor_y='center', color=(255, 255, 255, 255))) self.labels.append( pyglet.text.Label('RL RR', font_size=12, x=WINDOW_W / 2, y=WINDOW_H / 100 * 3, anchor_x='center', anchor_y='center', color=(255, 255, 255, 255))) self.labels.append( pyglet.text.Label('C', font_size=12, x=WINDOW_W / 64 * 37, y=WINDOW_H / 100 * 3, anchor_x='center', anchor_y='center', color=(255, 255, 255, 255))) self.labels.append( pyglet.text.Label('Angular Velocity', font_size=15, x=WINDOW_W / 16 * 13, y=WINDOW_H / 12, anchor_x='center', anchor_y='center', color=(255, 255, 255, 255))) self.labels.append( pyglet.text.Label('C', font_size=12, x=WINDOW_W / 16 * 13, y=WINDOW_H / 100 * 3, anchor_x='center', anchor_y='center', color=(255, 255, 255, 255))) self.transform = rendering.Transform() if "t" not in self.__dict__: return # reset() not called yet #zoom = 0.1*SCALE*max(1-self.t, 0) + ZOOM*SCALE*min(self.t, 1) # Animate zoom first second zoom = np.clip((ZOOM * SCALE - 1) * np.power(self.t, 5) + 1, 1, ZOOM * SCALE) zoom_state = ZOOM * SCALE * STATE_W / WINDOW_W zoom_video = ZOOM * SCALE * VIDEO_W / WINDOW_W scroll_x = self.car.hull.position[0] scroll_y = self.car.hull.position[1] angle = -self.car.hull.angle vel = self.car.hull.linearVelocity if np.linalg.norm(vel) > 0.5: angle = math.atan2(vel[0], vel[1]) self.transform.set_scale(zoom, zoom) self.transform.set_translation( WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) - scroll_y * zoom * math.sin(angle)), WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) + scroll_y * zoom * math.cos(angle))) self.transform.set_rotation(angle) self.car.draw(self.viewer, mode != "state_pixels") arr = None win = self.viewer.window win.switch_to() win.dispatch_events() win.clear() t = self.transform if mode == 'rgb_array': VP_W = VIDEO_W VP_H = VIDEO_H elif mode == 'state_pixels': VP_W = STATE_W VP_H = STATE_H else: pixel_scale = 1 if hasattr(win.context, '_nscontext'): pixel_scale = win.context._nscontext.view().backingScaleFactor( ) # pylint: disable=protected-access VP_W = int(pixel_scale * WINDOW_W) VP_H = int(pixel_scale * WINDOW_H) gl.glViewport(0, 0, VP_W, VP_H) t.enable() self.render_road() for geom in self.viewer.onetime_geoms: geom.render() self.viewer.onetime_geoms = [] t.disable() self.render_indicators(WINDOW_W, WINDOW_H) if mode == 'human': win.flip() return self.viewer.isopen image_data = pyglet.image.get_buffer_manager().get_color_buffer( ).get_image_data() arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep='') arr = arr.reshape(VP_H, VP_W, 4) arr = arr[::-1, :, 0:3] return arr def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None def render_road(self): gl.glBegin(gl.GL_QUADS) #gl.glColor4f(0.4, 0.8, 0.4, 1.0) gl.glColor4f(0.75, 0.75, 0.75, 1.0) gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0) gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0) gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0) gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0) #gl.glColor4f(0.4, 0.9, 0.4, 1.0) gl.glColor4f(0.65, 0.65, 0.65, 1.0) k = PLAYFIELD / 20.0 for x in range(-20, 20, 2): for y in range(-20, 20, 2): gl.glVertex3f(k * x + k, k * y + 0, 0) gl.glVertex3f(k * x + 0, k * y + 0, 0) gl.glVertex3f(k * x + 0, k * y + k, 0) gl.glVertex3f(k * x + k, k * y + k, 0) for i, (poly, color) in enumerate(self.road_poly): if i == 2: gl.glColor4f(1, 1, 1, 1) else: gl.glColor4f(color[0], color[1], color[2], 1) for p in poly: gl.glVertex3f(p[0], p[1], 0) gl.glEnd() def render_indicators(self, W, H): gl.glBegin(gl.GL_QUADS) gl.glColor4f(0, 0, 0, 0.2) gl.glVertex3f(W, 0, 0) gl.glVertex3f(W, H / 10, 0) gl.glVertex3f(0, H / 10, 0) gl.glVertex3f(0, 0, 0) w = W / 100 h = H / 100 def ver_ind(place, val, color): gl.glColor4f(color[0], color[1], color[2], color[3]) gl.glVertex3f(place - 1.5 * w, h * val, 0) gl.glVertex3f(place + 1.5 * w, h * val, 0) gl.glVertex3f(place + 1.5 * w, 0, 0) gl.glVertex3f(place - 1.5 * w, 0, 0) def hor_ind(place, val, color): gl.glColor4f(color[0], color[1], color[2], color[3]) gl.glVertex3f(place, 5 * h, 0) gl.glVertex3f(place + w * val, 5 * h, 0) gl.glVertex3f(place + w * val, h, 0) gl.glVertex3f(place, h, 0) true_speed = np.linalg.norm(self.car.hull.linearVelocity) hor_ind(W / 64 * 7, 7 * self.action[0], (1, 1, 0, 0.7)) ver_ind(W / 64 * 14, 6 * self.action[1], (0, 1, 0, 0.7)) ver_ind(W / 64 * 17, 6 * self.action[2], (1, 0, 0, 0.7)) ver_ind(W / 16 * 7 - 1.5 * w, 0.025 * self.car.wheels[0].omega, (0, 0.7, 1, 0.7)) ver_ind(W / 16 * 7 + 1.5 * w, 0.025 * self.car.wheels[1].omega, (0, 0.7, 1, 0.7)) ver_ind(W / 2 - 1.5 * w, 0.025 * self.car.wheels[2].omega, (0, 0.5, 1, 0.7)) ver_ind(W / 2 + 1.5 * w, 0.025 * self.car.wheels[3].omega, (0, 0.5, 1, 0.8)) ver_ind(W / 64 * 37, 0.05 * true_speed, (0, 0, 1, 0.7)) hor_ind(W / 16 * 13, -1 * self.car.hull.angularVelocity, (0.5, 0, 1, 0.7)) gl.glEnd() for label in self.labels: label.draw()
class CarRacing(gym.Env, EzPickle): metadata = { "render.modes": ["human", "rgb_array", "state_pixels"], "video.frames_per_second": FPS, } def __init__(self, verbose=1): EzPickle.__init__(self) self.seed() self.contactListener_keepref = FrictionDetector(self) self.world = Box2D.b2World((0, 0), contactListener=self.contactListener_keepref) self.viewer = None self.invisible_state_window = None self.invisible_video_window = None self.road = None self.car = None self.reward = 0.0 self.prev_reward = 0.0 self.verbose = verbose self.fd_tile = fixtureDef( shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]) ) self.action_space = spaces.Box( np.array([-1, 0, 0]).astype(np.float32), np.array([+1, +1, +1]).astype(np.float32), ) # steer, gas, brake self.observation_space = spaces.Box( low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8 ) def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _destroy(self): if not self.road: return for t in self.road: self.world.DestroyBody(t) self.road = [] self.car.destroy() def _create_track(self): CHECKPOINTS = 12 # Create checkpoints checkpoints = [] for c in range(CHECKPOINTS): noise = self.np_random.uniform(0, 2 * math.pi * 1 / CHECKPOINTS) alpha = 2 * math.pi * c / CHECKPOINTS + noise rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD) if c == 0: alpha = 0 rad = 1.5 * TRACK_RAD if c == CHECKPOINTS - 1: alpha = 2 * math.pi * c / CHECKPOINTS self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS rad = 1.5 * TRACK_RAD checkpoints.append((alpha, rad * math.cos(alpha), rad * math.sin(alpha))) self.road = [] # Go from one checkpoint to another to create track x, y, beta = 1.5 * TRACK_RAD, 0, 0 dest_i = 0 laps = 0 track = [] no_freeze = 2500 visited_other_side = False while True: alpha = math.atan2(y, x) if visited_other_side and alpha > 0: laps += 1 visited_other_side = False if alpha < 0: visited_other_side = True alpha += 2 * math.pi while True: # Find destination from checkpoints failed = True while True: dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)] if alpha <= dest_alpha: failed = False break dest_i += 1 if dest_i % len(checkpoints) == 0: break if not failed: break alpha -= 2 * math.pi continue r1x = math.cos(beta) r1y = math.sin(beta) p1x = -r1y p1y = r1x dest_dx = dest_x - x # vector towards destination dest_dy = dest_y - y # destination vector projected on rad: proj = r1x * dest_dx + r1y * dest_dy while beta - alpha > 1.5 * math.pi: beta -= 2 * math.pi while beta - alpha < -1.5 * math.pi: beta += 2 * math.pi prev_beta = beta proj *= SCALE if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001 * proj)) if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001 * proj)) x += p1x * TRACK_DETAIL_STEP y += p1y * TRACK_DETAIL_STEP track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y)) if laps > 4: break no_freeze -= 1 if no_freeze == 0: break # Find closed loop range i1..i2, first loop should be ignored, second is OK i1, i2 = -1, -1 i = len(track) while True: i -= 1 if i == 0: return False # Failed pass_through_start = ( track[i][0] > self.start_alpha and track[i - 1][0] <= self.start_alpha ) if pass_through_start and i2 == -1: i2 = i elif pass_through_start and i1 == -1: i1 = i break if self.verbose == 1: print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1)) assert i1 != -1 assert i2 != -1 track = track[i1 : i2 - 1] first_beta = track[0][1] first_perp_x = math.cos(first_beta) first_perp_y = math.sin(first_beta) # Length of perpendicular jump to put together head and tail well_glued_together = np.sqrt( np.square(first_perp_x * (track[0][2] - track[-1][2])) + np.square(first_perp_y * (track[0][3] - track[-1][3])) ) if well_glued_together > TRACK_DETAIL_STEP: return False # Red-white border on hard turns border = [False] * len(track) for i in range(len(track)): good = True oneside = 0 for neg in range(BORDER_MIN_COUNT): beta1 = track[i - neg - 0][1] beta2 = track[i - neg - 1][1] good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2 oneside += np.sign(beta1 - beta2) good &= abs(oneside) == BORDER_MIN_COUNT border[i] = good for i in range(len(track)): for neg in range(BORDER_MIN_COUNT): border[i - neg] |= border[i] # Create tiles for i in range(len(track)): alpha1, beta1, x1, y1 = track[i] alpha2, beta2, x2, y2 = track[i - 1] road1_l = ( x1 - TRACK_WIDTH * math.cos(beta1), y1 - TRACK_WIDTH * math.sin(beta1), ) road1_r = ( x1 + TRACK_WIDTH * math.cos(beta1), y1 + TRACK_WIDTH * math.sin(beta1), ) road2_l = ( x2 - TRACK_WIDTH * math.cos(beta2), y2 - TRACK_WIDTH * math.sin(beta2), ) road2_r = ( x2 + TRACK_WIDTH * math.cos(beta2), y2 + TRACK_WIDTH * math.sin(beta2), ) vertices = [road1_l, road1_r, road2_r, road2_l] self.fd_tile.shape.vertices = vertices t = self.world.CreateStaticBody(fixtures=self.fd_tile) t.userData = t c = 0.01 * (i % 3) t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c] t.road_visited = False t.road_friction = 1.0 t.fixtures[0].sensor = True self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color)) self.road.append(t) if border[i]: side = np.sign(beta2 - beta1) b1_l = ( x1 + side * TRACK_WIDTH * math.cos(beta1), y1 + side * TRACK_WIDTH * math.sin(beta1), ) b1_r = ( x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1), y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1), ) b2_l = ( x2 + side * TRACK_WIDTH * math.cos(beta2), y2 + side * TRACK_WIDTH * math.sin(beta2), ) b2_r = ( x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2), y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2), ) self.road_poly.append( ([b1_l, b1_r, b2_r, b2_l], (1, 1, 1) if i % 2 == 0 else (1, 0, 0)) ) self.track = track return True def reset(self): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] while True: success = self._create_track() if success: break if self.verbose == 1: print( "retry to generate track (normal if there are not many" "instances of this message)" ) self.car = Car(self.world, *self.track[0][1:4]) return self.step(None)[0] def step(self, action): if action is not None: self.car.steer(-action[0]) self.car.gas(action[1]) self.car.brake(action[2]) self.car.step(1.0 / FPS) self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) self.t += 1.0 / FPS self.state = self.render("state_pixels") step_reward = 0 done = False if action is not None: # First step without action, called from reset() self.reward -= 0.1 # We actually don't want to count fuel spent, we want car to be faster. # self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER self.car.fuel_spent = 0.0 step_reward = self.reward - self.prev_reward self.prev_reward = self.reward if self.tile_visited_count == len(self.track): done = True x, y = self.car.hull.position if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD: done = True step_reward = -100 return self.state, step_reward, done, {} def render(self, mode="human"): assert mode in ["human", "state_pixels", "rgb_array"] if self.viewer is None: from gym.envs.classic_control import rendering self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) self.score_label = pyglet.text.Label( "0000", font_size=36, x=20, y=WINDOW_H * 2.5 / 40.00, anchor_x="left", anchor_y="center", color=(255, 255, 255, 255), ) self.transform = rendering.Transform() if "t" not in self.__dict__: return # reset() not called yet # Animate zoom first second: zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(self.t, 1) scroll_x = self.car.hull.position[0] scroll_y = self.car.hull.position[1] angle = -self.car.hull.angle vel = self.car.hull.linearVelocity if np.linalg.norm(vel) > 0.5: angle = math.atan2(vel[0], vel[1]) self.transform.set_scale(zoom, zoom) self.transform.set_translation( WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) - scroll_y * zoom * math.sin(angle)), WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) + scroll_y * zoom * math.cos(angle)), ) self.transform.set_rotation(angle) self.car.draw(self.viewer, mode != "state_pixels") arr = None win = self.viewer.window win.switch_to() win.dispatch_events() win.clear() t = self.transform if mode == "rgb_array": VP_W = VIDEO_W VP_H = VIDEO_H elif mode == "state_pixels": VP_W = STATE_W VP_H = STATE_H else: pixel_scale = 1 if hasattr(win.context, "_nscontext"): pixel_scale = ( win.context._nscontext.view().backingScaleFactor() ) # pylint: disable=protected-access VP_W = int(pixel_scale * WINDOW_W) VP_H = int(pixel_scale * WINDOW_H) gl.glViewport(0, 0, VP_W, VP_H) t.enable() self.render_road() for geom in self.viewer.onetime_geoms: geom.render() self.viewer.onetime_geoms = [] t.disable() self.render_indicators(WINDOW_W, WINDOW_H) if mode == "human": win.flip() return self.viewer.isopen image_data = ( pyglet.image.get_buffer_manager().get_color_buffer().get_image_data() ) arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep="") arr = arr.reshape(VP_H, VP_W, 4) arr = arr[::-1, :, 0:3] return arr def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None def render_road(self): colors = [0.4, 0.8, 0.4, 1.0] * 4 polygons_ = [ +PLAYFIELD, +PLAYFIELD, 0, +PLAYFIELD, -PLAYFIELD, 0, -PLAYFIELD, -PLAYFIELD, 0, -PLAYFIELD, +PLAYFIELD, 0, ] k = PLAYFIELD / 20.0 colors.extend([0.4, 0.9, 0.4, 1.0] * 4 * 20 * 20) for x in range(-20, 20, 2): for y in range(-20, 20, 2): polygons_.extend( [ k * x + k, k * y + 0, 0, k * x + 0, k * y + 0, 0, k * x + 0, k * y + k, 0, k * x + k, k * y + k, 0, ] ) for poly, color in self.road_poly: colors.extend([color[0], color[1], color[2], 1] * len(poly)) for p in poly: polygons_.extend([p[0], p[1], 0]) vl = pyglet.graphics.vertex_list( len(polygons_) // 3, ("v3f", polygons_), ("c4f", colors) ) # gl.GL_QUADS, vl.draw(gl.GL_QUADS) vl.delete() def render_indicators(self, W, H): s = W / 40.0 h = H / 40.0 colors = [0, 0, 0, 1] * 4 polygons = [W, 0, 0, W, 5 * h, 0, 0, 5 * h, 0, 0, 0, 0] def vertical_ind(place, val, color): colors.extend([color[0], color[1], color[2], 1] * 4) polygons.extend( [ place * s, h + h * val, 0, (place + 1) * s, h + h * val, 0, (place + 1) * s, h, 0, (place + 0) * s, h, 0, ] ) def horiz_ind(place, val, color): colors.extend([color[0], color[1], color[2], 1] * 4) polygons.extend( [ (place + 0) * s, 4 * h, 0, (place + val) * s, 4 * h, 0, (place + val) * s, 2 * h, 0, (place + 0) * s, 2 * h, 0, ] ) true_speed = np.sqrt( np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1]) ) vertical_ind(5, 0.02 * true_speed, (1, 1, 1)) vertical_ind(7, 0.01 * self.car.wheels[0].omega, (0.0, 0, 1)) # ABS sensors vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1)) vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1)) vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1)) horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0)) horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0)) vl = pyglet.graphics.vertex_list( len(polygons) // 3, ("v3f", polygons), ("c4f", colors) ) # gl.GL_QUADS, vl.draw(gl.GL_QUADS) vl.delete() self.score_label.text = "%04i" % self.reward self.score_label.draw()
class CarRacing(gym.Env, EzPickle): """ ### Description The easiest continuous control task to learn from pixels - a top-down racing environment. Discrete control is reasonable in this environment as well; on/off discretization is fine. The game is solved when the agent consistently gets 900+ points. The generated track is random every episode. Some indicators are shown at the bottom of the window along with the state RGB buffer. From left to right: true speed, four ABS sensors, steering wheel position, and gyroscope. To play yourself (it's rather fast for humans), type: ``` python gym/envs/box2d/car_racing.py ``` Remember: it's a powerful rear-wheel drive car - don't press the accelerator and turn at the same time. ### Action Space There are 3 actions: steering (-1 is full left, +1 is full right), gas, and breaking. ### Observation Space State consists of 96x96 pixels. ### Rewards The reward is -0.1 every frame and +1000/N for every track tile visited, where N is the total number of tiles visited in the track. For example, if you have finished in 732 frames, your reward is 1000 - 0.1*732 = 926.8 points. ### Starting State The car starts at rest in the center of the road. ### Episode Termination The episode finishes when all of the tiles are visited. The car can also go outside of the playfield - that is, far off the track, in which case it will receive -100 reward and die. ### Arguments There are no arguments supported in constructing the environment. ### Version History - v0: Current version ### References - Chris Campbell (2014), http://www.iforce2d.net/b2dtut/top-down-car. ### Credits Created by Oleg Klimov """ metadata = { "render_modes": ["human", "rgb_array", "state_pixels"], "render_fps": FPS, } def __init__(self, verbose=1, lap_complete_percent=0.95): EzPickle.__init__(self) pygame.init() self.contactListener_keepref = FrictionDetector( self, lap_complete_percent) self.world = Box2D.b2World( (0, 0), contactListener=self.contactListener_keepref) self.screen = None self.clock = None self.isopen = True self.invisible_state_window = None self.invisible_video_window = None self.road = None self.car = None self.reward = 0.0 self.prev_reward = 0.0 self.verbose = verbose self.new_lap = False self.fd_tile = fixtureDef(shape=polygonShape( vertices=[(0, 0), (1, 0), (1, -1), (0, -1)])) # This will throw a warning in tests/envs/test_envs in utils/env_checker.py as the space is not symmetric # or normalised however this is not possible here so ignore self.action_space = spaces.Box( np.array([-1, 0, 0]).astype(np.float32), np.array([+1, +1, +1]).astype(np.float32), ) # steer, gas, brake self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8) def _destroy(self): if not self.road: return for t in self.road: self.world.DestroyBody(t) self.road = [] self.car.destroy() def _create_track(self): CHECKPOINTS = 12 # Create checkpoints checkpoints = [] for c in range(CHECKPOINTS): noise = self.np_random.uniform(0, 2 * math.pi * 1 / CHECKPOINTS) alpha = 2 * math.pi * c / CHECKPOINTS + noise rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD) if c == 0: alpha = 0 rad = 1.5 * TRACK_RAD if c == CHECKPOINTS - 1: alpha = 2 * math.pi * c / CHECKPOINTS self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS rad = 1.5 * TRACK_RAD checkpoints.append( (alpha, rad * math.cos(alpha), rad * math.sin(alpha))) self.road = [] # Go from one checkpoint to another to create track x, y, beta = 1.5 * TRACK_RAD, 0, 0 dest_i = 0 laps = 0 track = [] no_freeze = 2500 visited_other_side = False while True: alpha = math.atan2(y, x) if visited_other_side and alpha > 0: laps += 1 visited_other_side = False if alpha < 0: visited_other_side = True alpha += 2 * math.pi while True: # Find destination from checkpoints failed = True while True: dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)] if alpha <= dest_alpha: failed = False break dest_i += 1 if dest_i % len(checkpoints) == 0: break if not failed: break alpha -= 2 * math.pi continue r1x = math.cos(beta) r1y = math.sin(beta) p1x = -r1y p1y = r1x dest_dx = dest_x - x # vector towards destination dest_dy = dest_y - y # destination vector projected on rad: proj = r1x * dest_dx + r1y * dest_dy while beta - alpha > 1.5 * math.pi: beta -= 2 * math.pi while beta - alpha < -1.5 * math.pi: beta += 2 * math.pi prev_beta = beta proj *= SCALE if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001 * proj)) if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001 * proj)) x += p1x * TRACK_DETAIL_STEP y += p1y * TRACK_DETAIL_STEP track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y)) if laps > 4: break no_freeze -= 1 if no_freeze == 0: break # Find closed loop range i1..i2, first loop should be ignored, second is OK i1, i2 = -1, -1 i = len(track) while True: i -= 1 if i == 0: return False # Failed pass_through_start = (track[i][0] > self.start_alpha and track[i - 1][0] <= self.start_alpha) if pass_through_start and i2 == -1: i2 = i elif pass_through_start and i1 == -1: i1 = i break if self.verbose == 1: print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1)) assert i1 != -1 assert i2 != -1 track = track[i1:i2 - 1] first_beta = track[0][1] first_perp_x = math.cos(first_beta) first_perp_y = math.sin(first_beta) # Length of perpendicular jump to put together head and tail well_glued_together = np.sqrt( np.square(first_perp_x * (track[0][2] - track[-1][2])) + np.square(first_perp_y * (track[0][3] - track[-1][3]))) if well_glued_together > TRACK_DETAIL_STEP: return False # Red-white border on hard turns border = [False] * len(track) for i in range(len(track)): good = True oneside = 0 for neg in range(BORDER_MIN_COUNT): beta1 = track[i - neg - 0][1] beta2 = track[i - neg - 1][1] good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2 oneside += np.sign(beta1 - beta2) good &= abs(oneside) == BORDER_MIN_COUNT border[i] = good for i in range(len(track)): for neg in range(BORDER_MIN_COUNT): border[i - neg] |= border[i] # Create tiles for i in range(len(track)): alpha1, beta1, x1, y1 = track[i] alpha2, beta2, x2, y2 = track[i - 1] road1_l = ( x1 - TRACK_WIDTH * math.cos(beta1), y1 - TRACK_WIDTH * math.sin(beta1), ) road1_r = ( x1 + TRACK_WIDTH * math.cos(beta1), y1 + TRACK_WIDTH * math.sin(beta1), ) road2_l = ( x2 - TRACK_WIDTH * math.cos(beta2), y2 - TRACK_WIDTH * math.sin(beta2), ) road2_r = ( x2 + TRACK_WIDTH * math.cos(beta2), y2 + TRACK_WIDTH * math.sin(beta2), ) vertices = [road1_l, road1_r, road2_r, road2_l] self.fd_tile.shape.vertices = vertices t = self.world.CreateStaticBody(fixtures=self.fd_tile) t.userData = t c = 0.01 * (i % 3) t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c] t.road_visited = False t.road_friction = 1.0 t.idx = i t.fixtures[0].sensor = True self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color)) self.road.append(t) if border[i]: side = np.sign(beta2 - beta1) b1_l = ( x1 + side * TRACK_WIDTH * math.cos(beta1), y1 + side * TRACK_WIDTH * math.sin(beta1), ) b1_r = ( x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1), y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1), ) b2_l = ( x2 + side * TRACK_WIDTH * math.cos(beta2), y2 + side * TRACK_WIDTH * math.sin(beta2), ) b2_r = ( x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2), y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2), ) self.road_poly.append(([b1_l, b1_r, b2_r, b2_l], (1, 1, 1) if i % 2 == 0 else (1, 0, 0))) self.track = track return True def reset( self, *, seed: Optional[int] = None, return_info: bool = False, options: Optional[dict] = None, ): super().reset(seed=seed) self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.new_lap = False self.road_poly = [] while True: success = self._create_track() if success: break if self.verbose == 1: print("retry to generate track (normal if there are not many" "instances of this message)") self.car = Car(self.world, *self.track[0][1:4]) if not return_info: return self.step(None)[0] else: return self.step(None)[0], {} def step(self, action): if action is not None: self.car.steer(-action[0]) self.car.gas(action[1]) self.car.brake(action[2]) self.car.step(1.0 / FPS) self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) self.t += 1.0 / FPS self.state = self.render("state_pixels") step_reward = 0 done = False if action is not None: # First step without action, called from reset() self.reward -= 0.1 # We actually don't want to count fuel spent, we want car to be faster. # self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER self.car.fuel_spent = 0.0 step_reward = self.reward - self.prev_reward self.prev_reward = self.reward if self.tile_visited_count == len(self.track) or self.new_lap: done = True x, y = self.car.hull.position if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD: done = True step_reward = -100 return self.state, step_reward, done, {} def render(self, mode="human"): assert mode in ["human", "state_pixels", "rgb_array"] if self.screen is None and mode == "human": pygame.display.init() self.screen = pygame.display.set_mode((WINDOW_W, WINDOW_H)) if self.clock is None: self.clock = pygame.time.Clock() if "t" not in self.__dict__: return # reset() not called yet self.surf = pygame.Surface((WINDOW_W, WINDOW_H)) # computing transformations angle = -self.car.hull.angle # Animating first second zoom. zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(self.t, 1) scroll_x = -(self.car.hull.position[0] + PLAYFIELD) * zoom scroll_y = -(self.car.hull.position[1] + PLAYFIELD) * zoom trans = pygame.math.Vector2((scroll_x, scroll_y)).rotate_rad(angle) trans = (WINDOW_W / 2 + trans[0], WINDOW_H / 4 + trans[1]) self.render_road(zoom, trans, angle) self.car.draw(self.surf, zoom, trans, angle, mode != "state_pixels") self.surf = pygame.transform.flip(self.surf, False, True) # showing stats self.render_indicators(WINDOW_W, WINDOW_H) font = pygame.font.Font(pygame.font.get_default_font(), 42) text = font.render("%04i" % self.reward, True, (255, 255, 255), (0, 0, 0)) text_rect = text.get_rect() text_rect.center = (60, WINDOW_H - WINDOW_H * 2.5 / 40.0) self.surf.blit(text, text_rect) if mode == "human": pygame.event.pump() self.clock.tick(self.metadata["render_fps"]) self.screen.fill(0) self.screen.blit(self.surf, (0, 0)) pygame.display.flip() if mode == "rgb_array": return self._create_image_array(self.surf, (VIDEO_W, VIDEO_H)) elif mode == "state_pixels": return self._create_image_array(self.surf, (STATE_W, STATE_H)) else: return self.isopen def render_road(self, zoom, translation, angle): bounds = PLAYFIELD field = [ (2 * bounds, 2 * bounds), (2 * bounds, 0), (0, 0), (0, 2 * bounds), ] trans_field = [] self.draw_colored_polygon(self.surf, field, (102, 204, 102), zoom, translation, angle) k = bounds / (20.0) grass = [] for x in range(0, 40, 2): for y in range(0, 40, 2): grass.append([ (k * x + k, k * y + 0), (k * x + 0, k * y + 0), (k * x + 0, k * y + k), (k * x + k, k * y + k), ]) for poly in grass: self.draw_colored_polygon(self.surf, poly, (102, 230, 102), zoom, translation, angle) for poly, color in self.road_poly: # converting to pixel coordinates poly = [(p[0] + PLAYFIELD, p[1] + PLAYFIELD) for p in poly] color = [int(c * 255) for c in color] self.draw_colored_polygon(self.surf, poly, color, zoom, translation, angle) def render_indicators(self, W, H): s = W / 40.0 h = H / 40.0 color = (0, 0, 0) polygon = [(W, H), (W, H - 5 * h), (0, H - 5 * h), (0, H)] pygame.draw.polygon(self.surf, color=color, points=polygon) def vertical_ind(place, val): return [ (place * s, H - (h + h * val)), ((place + 1) * s, H - (h + h * val)), ((place + 1) * s, H - h), ((place + 0) * s, H - h), ] def horiz_ind(place, val): return [ ((place + 0) * s, H - 4 * h), ((place + val) * s, H - 4 * h), ((place + val) * s, H - 2 * h), ((place + 0) * s, H - 2 * h), ] true_speed = np.sqrt( np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1])) # simple wrapper to render if the indicator value is above a threshold def render_if_min(value, points, color): if abs(value) > 1e-4: pygame.draw.polygon(self.surf, points=points, color=color) render_if_min(true_speed, vertical_ind(5, 0.02 * true_speed), (255, 255, 255)) # ABS sensors render_if_min( self.car.wheels[0].omega, vertical_ind(7, 0.01 * self.car.wheels[0].omega), (0, 0, 255), ) render_if_min( self.car.wheels[1].omega, vertical_ind(8, 0.01 * self.car.wheels[1].omega), (0, 0, 255), ) render_if_min( self.car.wheels[2].omega, vertical_ind(9, 0.01 * self.car.wheels[2].omega), (51, 0, 255), ) render_if_min( self.car.wheels[3].omega, vertical_ind(10, 0.01 * self.car.wheels[3].omega), (51, 0, 255), ) render_if_min( self.car.wheels[0].joint.angle, horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle), (0, 255, 0), ) render_if_min( self.car.hull.angularVelocity, horiz_ind(30, -0.8 * self.car.hull.angularVelocity), (255, 0, 0), ) def draw_colored_polygon(self, surface, poly, color, zoom, translation, angle): poly = [pygame.math.Vector2(c).rotate_rad(angle) for c in poly] poly = [(c[0] * zoom + translation[0], c[1] * zoom + translation[1]) for c in poly] gfxdraw.aapolygon(self.surf, poly, color) gfxdraw.filled_polygon(self.surf, poly, color) def _create_image_array(self, screen, size): scaled_screen = pygame.transform.smoothscale(screen, size) return np.transpose(np.array(pygame.surfarray.pixels3d(scaled_screen)), axes=(1, 0, 2)) def close(self): pygame.quit() if self.screen is not None: pygame.display.quit() self.isopen = False
class CarRacing(gym.Env, EzPickle): metadata = { "render.modes": ["human", "rgb_array", "state_pixels"], "video.frames_per_second": FPS, } def __init__(self, verbose=1, obstacles=False): EzPickle.__init__(self) self.SI = SI(env=self, car_shape=(4, 8), image_shape=(STATE_W, STATE_H), render_distance=40, road_width=40 / 6, fill=True, interpolate=True, obstacles=obstacles) self.seed() self.contactListener_keepref = FrictionDetector(self) self.world = Box2D.b2World( (0, 0), contactListener=self.contactListener_keepref) self.viewer = None self.invisible_state_window = None self.invisible_video_window = None self.road = None self.car = None self.reward = 0.0 self.prev_reward = 0.0 self.verbose = verbose self.steps = 0 self.n_obstacles = 10 self.obstacles = obstacles self.dim_obstacles = (0.5, 0.5) self.collision_threshold = 0.1 #1m distance between obstacle and vehicle self.COLLISION = False self.fd_tile = fixtureDef( shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]) ) # The fd_tile variable defines the fixture with the shape defined as a rectangle with coordinates # [(0, 0) | (1, 0)] # [(0,-1) | (1,-1)] self.action_space = spaces.Box(np.array([-1, 0, 0]), np.array([+1, +1, +1]), dtype=np.float32) # steer, gas, brake self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8) self.R = lambda x, y, angle: [ x * np.cos(angle) - y * np.sin(angle), y * np.cos(angle) + x * np. sin(angle) ] def _create_obstacles(self): "This function randomly generates obstacles along the track for the vehicle to avoid" obstacle_interval = np.floor( (len(self.track) - 1) / (self.n_obstacles) ) # after how many track vertices must a obstacle appear. count = 0 self.obstacles_pos = np.zeros((self.n_obstacles, 4, 2)) for i in range(len(self.track)): if i % obstacle_interval == 0 and count < self.n_obstacles and i > 1: count += 1 alpha1, beta1, x1, y1 = self.track[i] alpha2, beta2, x2, y2 = self.track[i - 1] sign = 1 if np.random.random() < 0.5 else -1 road1_l = (x1 - sign * self.dim_obstacles[0] * math.cos(beta1), y1 - sign * self.dim_obstacles[0] * math.sin(beta1)) road1_r = (x1 + sign * self.dim_obstacles[1] * math.cos(beta1), y1 + sign * self.dim_obstacles[1] * math.sin(beta1)) road2_l = (x2 - sign * self.dim_obstacles[0] * math.cos(beta2), y2 - sign * self.dim_obstacles[0] * math.sin(beta2)) road2_r = (x2 + sign * self.dim_obstacles[1] * math.cos(beta2), y2 + sign * self.dim_obstacles[1] * math.sin(beta2)) self.obstacle_poly.extend([ road1_l[0], road1_l[1], 0, road1_r[0], road1_r[1], 0, road2_r[0], road2_r[1], 0, road2_l[0], road2_l[1], 0 ]) self.obstacles_pos[count - 1, :, 0] = np.array( [road1_l[0], road1_r[0], road2_r[0], road2_l[0]]) self.obstacles_pos[count - 1, :, 1] = np.array( [road1_l[1], road1_r[1], road2_r[1], road2_l[1]]) if len(self.obstacle_poly) // 3 == 4 * self.n_obstacles: return True else: print('There was a problem generating the obstacle course') return False def _create_track(self): "The number of checkpoints are the number of turns where the minimum is 2." CHECKPOINTS = 12 # Create checkpoints checkpoints = [] for c in range(CHECKPOINTS): noise = self.np_random.uniform(0, 2 * math.pi * 1 / CHECKPOINTS) alpha = 2 * math.pi * c / CHECKPOINTS + noise rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD) if c == 0: alpha = 0 rad = 1.5 * TRACK_RAD if c == CHECKPOINTS - 1: alpha = 2 * math.pi * c / CHECKPOINTS self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS rad = 1.5 * TRACK_RAD checkpoints.append( (alpha, rad * math.cos(alpha), rad * math.sin(alpha))) self.road = [] # Go from one checkpoint to another to create track x, y, beta = 1.5 * TRACK_RAD, 0, 0 # The starting x value is always = dest_i = 0 laps = 0 # The number of laps required to finish the course, leave this on 0 - no lap only once through course. track = [] no_freeze = 2500 visited_other_side = False # This indicates if the lap is completed while True: alpha = math.atan2(y, x) if visited_other_side and alpha > 0: laps += 1 visited_other_side = False if alpha < 0: visited_other_side = True alpha += 2 * math.pi while True: # Find destination from checkpoints failed = True while True: dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)] if alpha <= dest_alpha: failed = False break dest_i += 1 if dest_i % len(checkpoints) == 0: break if not failed: break alpha -= 2 * math.pi continue r1x = math.cos(beta) r1y = math.sin(beta) p1x = -r1y p1y = r1x dest_dx = dest_x - x # vector towards destination dest_dy = dest_y - y # destination vector projected on rad: proj = r1x * dest_dx + r1y * dest_dy while beta - alpha > 1.5 * math.pi: beta -= 2 * math.pi while beta - alpha < -1.5 * math.pi: beta += 2 * math.pi prev_beta = beta proj *= SCALE if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001 * proj)) if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001 * proj)) x += p1x * TRACK_DETAIL_STEP y += p1y * TRACK_DETAIL_STEP track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y)) if laps > 4: break no_freeze -= 1 if no_freeze == 0: break # Find closed loop range i1..i2, first loop should be ignored, second is OK i1, i2 = -1, -1 i = len(track) while True: i -= 1 if i == 0: return False # Failed pass_through_start = (track[i][0] > self.start_alpha and track[i - 1][0] <= self.start_alpha) if pass_through_start and i2 == -1: i2 = i elif pass_through_start and i1 == -1: i1 = i break if self.verbose == 1: print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1)) assert i1 != -1 assert i2 != -1 track = track[i1:i2 - 1] first_beta = track[0][1] first_perp_x = math.cos(first_beta) first_perp_y = math.sin(first_beta) # Length of perpendicular jump to put together head and tail well_glued_together = np.sqrt( np.square(first_perp_x * (track[0][2] - track[-1][2])) + np.square(first_perp_y * (track[0][3] - track[-1][3]))) if well_glued_together > TRACK_DETAIL_STEP: return False # Red-white border on hard turns border = [False] * len(track) for i in range(len(track)): good = True oneside = 0 for neg in range(BORDER_MIN_COUNT): beta1 = track[i - neg - 0][1] beta2 = track[i - neg - 1][1] good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2 oneside += np.sign(beta1 - beta2) good &= abs(oneside) == BORDER_MIN_COUNT border[i] = good for i in range(len(track)): for neg in range(BORDER_MIN_COUNT): border[i - neg] |= border[i] # Create tiles for i in range(len(track)): alpha1, beta1, x1, y1 = track[i] alpha2, beta2, x2, y2 = track[i - 1] road1_l = ( x1 - TRACK_WIDTH * math.cos(beta1), y1 - TRACK_WIDTH * math.sin(beta1), ) road1_r = ( x1 + TRACK_WIDTH * math.cos(beta1), y1 + TRACK_WIDTH * math.sin(beta1), ) road2_l = ( x2 - TRACK_WIDTH * math.cos(beta2), y2 - TRACK_WIDTH * math.sin(beta2), ) road2_r = ( x2 + TRACK_WIDTH * math.cos(beta2), y2 + TRACK_WIDTH * math.sin(beta2), ) vertices = [road1_l, road1_r, road2_r, road2_l] self.fd_tile.shape.vertices = vertices t = self.world.CreateStaticBody( fixtures=self.fd_tile ) # The call of a static body may be very important and is based on the df_tile = [rl1, rr1, rl2, rr2] t.userData = t c = 0.01 * (i % 3 ) # This is the interchanging colors for the tiles t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c] t.road_visited = False t.road_friction = 1.0 # Here is where we can change the friction coefficient of the road from tar - offroad t.fixtures[0].sensor = True self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color)) self.road.append(t) if border[i]: side = np.sign(beta2 - beta1) b1_l = ( x1 + side * TRACK_WIDTH * math.cos(beta1), y1 + side * TRACK_WIDTH * math.sin(beta1), ) b1_r = ( x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1), y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1), ) b2_l = ( x2 + side * TRACK_WIDTH * math.cos(beta2), y2 + side * TRACK_WIDTH * math.sin(beta2), ) b2_r = ( x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2), y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2), ) self.road_poly.append(([b1_l, b1_r, b2_r, b2_l], (1, 1, 1) if i % 2 == 0 else (1, 0, 0))) self.track = track return True def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _destroy(self): if not self.road: return for t in self.road: self.world.DestroyBody(t) self.road = [] self.car.destroy() def reset(self): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] self.obstacle_poly = [] self.steps = 0 while True: success_track = self._create_track() if self.obstacles: if success_track: success_obstacles = self._create_obstacles() else: success_obstacles = False else: success_obstacles = True # just so it goes through to next stage if success_track and success_obstacles: break if self.verbose == 1: print("retry to generate track (normal if there are not many" "instances of this message)") self.car = Car(self.world, *self.track[0][1:4]) return self.step(None)[0] def step(self, action): self.steps += 1 if action is not None: self.car.steer(-action[0]) self.car.gas(action[1]) self.car.brake(action[2]) self.car.step(1.0 / FPS) self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) self.t += 1.0 / FPS self.state = self.SI.generate_image( ) #self.state = self.render("state_pixels") step_reward = 0 done = False if self.obstacles: self.collision() else: self.COLLISION = False if action is not None: # First step without action, called from reset() self.reward -= 0.1 # We actually don't want to count fuel spent, we want car to be faster. # self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER self.car.fuel_spent = 0.0 step_reward = self.reward - self.prev_reward self.prev_reward = self.reward if self.tile_visited_count == len(self.track): done = True x, y = self.car.hull.position if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD: done = True step_reward = -100 if self.COLLISION: done = True step_reward = -100 return self.state, step_reward, done, {} def render(self, mode="human"): assert mode in ["human", "state_pixels", "rgb_array"] if self.viewer is None: self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) self.score_label = pyglet.text.Label( "0000", font_size=36, x=20, y=WINDOW_H * 2.5 / 40.00, anchor_x="left", anchor_y="center", color=(255, 255, 255, 255), ) self.transform = rendering.Transform() if "t" not in self.__dict__: return # reset() not called yet # Animate zoom first second: zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(self.t, 1) scroll_x = self.car.hull.position[0] scroll_y = self.car.hull.position[1] angle = -self.car.hull.angle vel = self.car.hull.linearVelocity if np.linalg.norm(vel) > 0.5: angle = math.atan2(vel[0], vel[1]) self.transform.set_scale(zoom, zoom) self.transform.set_translation( WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) - scroll_y * zoom * math.sin(angle)), WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) + scroll_y * zoom * math.cos(angle)), ) self.transform.set_rotation(angle) self.car.draw(self.viewer, mode != "state_pixels") arr = None #-------- VP_W = STATE_W VP_H = STATE_H #-------- win = self.viewer.window win.switch_to() win.dispatch_events() win.clear() t = self.transform if mode == "rgb_array": VP_W = VIDEO_W VP_H = VIDEO_H elif mode == "state_pixels": VP_W = STATE_W VP_H = STATE_H else: pixel_scale = 1 if hasattr(win.context, "_nscontext"): pixel_scale = ( win.context._nscontext.view().backingScaleFactor()) # pylint: disable=protected-access VP_W = int(pixel_scale * WINDOW_W) VP_H = int(pixel_scale * WINDOW_H) gl.glViewport(0, 0, VP_W, VP_H) t.enable() self.render_road() if self.obstacles: self.render_obstacles() self.render_collision() for geom in self.viewer.onetime_geoms: geom.render() self.viewer.onetime_geoms = [] t.disable() self.render_indicators(WINDOW_W, WINDOW_H) if mode == "human": win.flip() return self.viewer.isopen image_data = (pyglet.image.get_buffer_manager().get_color_buffer(). get_image_data()) arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep="") arr = arr.reshape(VP_H, VP_W, 4) arr = arr[::-1, :, 0:3] return arr def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None def render_road(self): colors = [0.4, 0.8, 0.4, 1.0] * 4 polygons_ = [ +PLAYFIELD, +PLAYFIELD, 0, +PLAYFIELD, -PLAYFIELD, 0, -PLAYFIELD, -PLAYFIELD, 0, -PLAYFIELD, +PLAYFIELD, 0, ] k = PLAYFIELD / 20.0 colors.extend([0.4, 0.9, 0.4, 1.0] * 4 * 20 * 20) for x in range(-20, 20, 2): for y in range(-20, 20, 2): polygons_.extend([ k * x + k, k * y + 0, 0, k * x + 0, k * y + 0, 0, k * x + 0, k * y + k, 0, k * x + k, k * y + k, 0, ]) for poly, color in self.road_poly: # self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color)) colors.extend([color[0], color[1], color[2], 1] * len(poly)) for p in poly: polygons_.extend([p[0], p[1], 0]) vl = pyglet.graphics.vertex_list( len(polygons_) // 3, ("v3f", polygons_), ( "c4f", colors ) # gl.GL_QUADS, # The // 3 is dividing by 3 but obtaining only the integer value ) vl.draw(gl.GL_QUADS) def render_indicators(self, W, H): s = W / 40.0 h = H / 40.0 colors = [0, 0, 0, 1] * 4 polygons = [W, 0, 0, W, 5 * h, 0, 0, 5 * h, 0, 0, 0, 0] def vertical_ind(place, val, color): colors.extend([color[0], color[1], color[2], 1] * 4) polygons.extend([ place * s, h + h * val, 0, (place + 1) * s, h + h * val, 0, (place + 1) * s, h, 0, (place + 0) * s, h, 0, ]) def horiz_ind(place, val, color): colors.extend([color[0], color[1], color[2], 1] * 4) polygons.extend([ (place + 0) * s, 4 * h, 0, (place + val) * s, 4 * h, 0, (place + val) * s, 2 * h, 0, (place + 0) * s, 2 * h, 0, ]) true_speed = np.sqrt( np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1])) vertical_ind(5, 0.02 * true_speed, (1, 1, 1)) vertical_ind(7, 0.01 * self.car.wheels[0].omega, (0.0, 0, 1)) # ABS sensors vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1)) vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1)) vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1)) horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0)) horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0)) vl = pyglet.graphics.vertex_list( len(polygons) // 3, ("v3f", polygons), ("c4f", colors) # gl.GL_QUADS, ) vl.draw(gl.GL_QUADS) self.score_label.text = "%04i" % self.reward self.score_label.draw() def render_obstacles(self): "This function is responsible for rendering all the obstacles randomly in the course" # RGB for all 4 vertices by the number C = [255, 5, 5, 255, 5, 5, 255, 5, 5, 255, 5, 5] * self.n_obstacles # Divide by 3 because there are 3 components x,y,z v2 = pyglet.graphics.vertex_list( len(self.obstacle_poly) // 3, ('v3f', self.obstacle_poly), ('c3B', C)) v2.draw(gl.GL_QUADS) def render_collision(self): if self.COLLISION: x, y = self.car.hull.position t_angle = self.car.hull.angle x1 = (self.R(2, 3, t_angle)[0]) + (x) y1 = (self.R(2, 3, t_angle)[1]) + (y) x2 = (self.R(-2, 3, t_angle)[0]) + (x) y2 = (self.R(-2, 3, t_angle)[1]) + (y) x3 = self.R(-2, -3, t_angle)[0] + (x) y3 = self.R(-2, -3, t_angle)[1] + (y) x4 = self.R(2, -3, t_angle)[0] + (x) y4 = self.R(2, -3, t_angle)[1] + (y) V = [x1, y1, 0, x2, y2, 0, x3, y3, 0, x4, y4, 0] C = [255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255] v3 = pyglet.graphics.vertex_list(4, ('v3f', V), ('c3B', C)) v3.draw(gl.GL_QUADS) def collision(self): " This function determines whether a collision with an obstacle has occurred or not " " The vehicle cannot reverse, so we are only interested in the front two components of the vehicles hull" x, y = self.car.hull.position t_angle = self.car.hull.angle x1 = self.R(2, 3, t_angle)[0] + x y1 = self.R(2, 3, t_angle)[1] + y x2 = self.R(-2, 3, t_angle)[0] + x y2 = self.R(-2, 3, t_angle)[1] + y x3 = self.R(-2, -3, t_angle)[0] + x y3 = self.R(-2, -3, t_angle)[1] + y x4 = self.R(2, -3, t_angle)[0] + x y4 = self.R(2, -3, t_angle)[1] + y midpoints_upper = [ (self.obstacles_pos[:, 2, 0] + self.obstacles_pos[:, 3, 0]) / 2, (self.obstacles_pos[:, 2, 1] + self.obstacles_pos[:, 3, 1]) / 2 ] midpoints_lower = [ (self.obstacles_pos[:, 2, 0] + self.obstacles_pos[:, 3, 0]) / 2, (self.obstacles_pos[:, 2, 1] + self.obstacles_pos[:, 3, 1]) / 2 ] distance_1 = np.sqrt((x1 - self.obstacles_pos[:, :, 0])**2 + (y1 - self.obstacles_pos[:, :, 1])**2) distance_2 = np.sqrt((x2 - self.obstacles_pos[:, :, 0])**2 + (y2 - self.obstacles_pos[:, :, 1])**2) distance_3 = np.sqrt((x3 - self.obstacles_pos[:, :, 0])**2 + (y3 - self.obstacles_pos[:, :, 1])**2) distance_4 = np.sqrt((x4 - self.obstacles_pos[:, :, 0])**2 + (y4 - self.obstacles_pos[:, :, 1])**2) u_midpoint_1 = np.sqrt((x1 - midpoints_upper[0])**2 + (y1 - midpoints_upper[1])**2) * 0.5 u_midpoint_2 = np.sqrt((x2 - midpoints_upper[0])**2 + (y2 - midpoints_upper[1])**2) * 0.5 u_midpoint_3 = np.sqrt((x3 - midpoints_upper[0])**2 + (y3 - midpoints_upper[1])**2) * 0.5 u_midpoint_4 = np.sqrt((x4 - midpoints_upper[0])**2 + (y4 - midpoints_upper[1])**2) * 0.5 l_midpoint_1 = np.sqrt((x1 - midpoints_lower[0])**2 + (y1 - midpoints_lower[1])**2) * 0.5 l_midpoint_2 = np.sqrt((x2 - midpoints_lower[0])**2 + (y2 - midpoints_lower[1])**2) * 0.5 l_midpoint_3 = np.sqrt((x3 - midpoints_lower[0])**2 + (y3 - midpoints_lower[1])**2) * 0.5 l_midpoint_4 = np.sqrt((x4 - midpoints_lower[0])**2 + (y4 - midpoints_lower[1])**2) * 0.5 smallest_distance = np.min([ np.min(distance_1), np.min(distance_2), np.min(distance_3), np.min(distance_4), np.min(u_midpoint_1), np.min(u_midpoint_2), np.min(u_midpoint_3), np.min(u_midpoint_4), np.min(l_midpoint_1), np.min(l_midpoint_2), np.min(l_midpoint_3), np.min(l_midpoint_4) ]) if smallest_distance < self.collision_threshold: self.COLLISION = True else: self.COLLISION = False
class CarRacing(gym.Env, EzPickle): metadata = { 'render.modes': ['human', 'rgb_array', 'state_pixels'], 'video.frames_per_second': FPS } def __init__(self): EzPickle.__init__(self) self.seed() self.contactListener_keepref = FrictionDetector(self) self.world = Box2D.b2World( (0, 0), contactListener=self.contactListener_keepref) self.viewer = None #self.invisible_state_window = None #self.invisible_video_window = None self.road = None self.car = None self.reward = 0.0 self.prev_reward = 0.0 self.init_state = (0.0, 0.0, 0.0) self.aim_x = 5.0 self.aim_y = 10.0 self.aim_th = 0.0 self.action_space = spaces.Box(np.array([-1, 0, 0]), np.array([+1, +1, +1]), dtype=np.float32) # steer, gas, brake self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8) def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def random_aim(self): self.aim_x = np.random.rand() * 10 self.aim_y = np.random.rand() * 10 self.aim_th = np.random.rand() def _destroy(self): if not self.road: return for t in self.road: self.world.DestroyBody(t) self.road = [] self.car.destroy() def _create_track(self): self.road = [] car = np.array([ [-CAR_W / 2, -CAR_L / 2], [CAR_W / 2, -CAR_L / 2], [CAR_W / 2, CAR_L / 2], [-CAR_W / 2, CAR_L / 2], ]) rot = np.array([[np.cos(self.aim_th), -np.sin(self.aim_th)], [np.sin(self.aim_th), np.cos(self.aim_th)]]) car_rot = np.dot(rot, car.T) car_rot_tran = car_rot + np.array([[self.aim_x], [self.aim_y]]) aim = car_rot_tran.T t = self.world.CreateStaticBody(fixtures=fixtureDef(shape=polygonShape( vertices=aim.tolist()))) t.userData = t t.color = [AIM_COLOR[0], AIM_COLOR[1], AIM_COLOR[2]] t.road_visited = False t.road_friction = 1.0 t.fixtures[0].sensor = True self.road_poly.append((aim.tolist(), t.color)) self.road.append(t) return True def reset(self): self._destroy() self.random_aim() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] self.human_render = False while True: success = self._create_track() if success: break print( "retry to generate track (normal if there are not many of this messages)" ) self.car = Car(self.world, *self.init_state) return self.step(None)[0] def step(self, action): if action is not None: self.car.steer(-action[0]) self.car.gas(action[1]) self.car.brake(action[2]) self.car.step(1.0 / FPS) self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) self.t += 1.0 / FPS self.state = self.render("state_pixels") step_reward = 0 done = False if action is not None: # First step without action, called from reset() self.reward -= 0.1 # We actually don't want to count fuel spent, we want car to be faster. #self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER self.car.fuel_spent = 0.0 step_reward = self.reward - self.prev_reward self.prev_reward = self.reward x, y = self.car.hull.position if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD: done = True step_reward = -100 return self.state, step_reward, done, {} def render(self, mode='human'): if self.viewer is None: from gym.envs.classic_control import rendering self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) self.score_label = pyglet.text.Label('0000', font_size=36, x=20, y=WINDOW_H * 2.5 / 40.00, anchor_x='left', anchor_y='center', color=(255, 255, 255, 255)) self.transform = rendering.Transform() if "t" not in self.__dict__: return # reset() not called yet zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min( self.t, 1) # Animate zoom first second zoom_state = ZOOM * SCALE * STATE_W / WINDOW_W zoom_video = ZOOM * SCALE * VIDEO_W / WINDOW_W scroll_x = self.car.hull.position[0] scroll_y = self.car.hull.position[1] angle = -self.car.hull.angle vel = self.car.hull.linearVelocity if np.linalg.norm(vel) > 0.5: angle = math.atan2(vel[0], vel[1]) self.transform.set_scale(zoom, zoom) self.transform.set_translation( WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) - scroll_y * zoom * math.sin(angle)), WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) + scroll_y * zoom * math.cos(angle))) self.transform.set_rotation(angle) self.car.draw(self.viewer, mode != "state_pixels") arr = None win = self.viewer.window if mode != 'state_pixels': win.switch_to() win.dispatch_events() if mode == "rgb_array" or mode == "state_pixels": win.clear() t = self.transform if mode == 'rgb_array': VP_W = VIDEO_W VP_H = VIDEO_H else: VP_W = STATE_W VP_H = STATE_H gl.glViewport(0, 0, VP_W, VP_H) t.enable() self.render_road() for geom in self.viewer.onetime_geoms: geom.render() t.disable() self.render_indicators(WINDOW_W, WINDOW_H) # TODO: find why 2x needed, wtf image_data = pyglet.image.get_buffer_manager().get_color_buffer( ).get_image_data() arr = np.fromstring(image_data.data, dtype=np.uint8, sep='') arr = arr.reshape(VP_H, VP_W, 4) arr = arr[::-1, :, 0:3] if mode == "rgb_array" and not self.human_render: # agent can call or not call env.render() itself when recording video. win.flip() if mode == 'human': self.human_render = True win.clear() t = self.transform gl.glViewport(0, 0, WINDOW_W, WINDOW_H) t.enable() self.render_road() for geom in self.viewer.onetime_geoms: geom.render() t.disable() self.render_indicators(WINDOW_W, WINDOW_H) win.flip() self.viewer.onetime_geoms = [] return arr def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None def render_road(self): gl.glBegin(gl.GL_QUADS) gl.glColor4f(*ROAD_COLOR, 1.0) gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0) gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0) gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0) gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0) for poly, color in self.road_poly: gl.glColor4f(color[0], color[1], color[2], 1) for p in poly: gl.glVertex3f(p[0], p[1], 0) gl.glEnd() def render_indicators(self, W, H): gl.glBegin(gl.GL_QUADS) s = W / 40.0 h = H / 40.0 gl.glColor4f(0, 0, 0, 1) gl.glVertex3f(W, 0, 0) gl.glVertex3f(W, 5 * h, 0) gl.glVertex3f(0, 5 * h, 0) gl.glVertex3f(0, 0, 0) def vertical_ind(place, val, color): gl.glColor4f(color[0], color[1], color[2], 1) gl.glVertex3f((place + 0) * s, h + h * val, 0) gl.glVertex3f((place + 1) * s, h + h * val, 0) gl.glVertex3f((place + 1) * s, h, 0) gl.glVertex3f((place + 0) * s, h, 0) def horiz_ind(place, val, color): gl.glColor4f(color[0], color[1], color[2], 1) gl.glVertex3f((place + 0) * s, 4 * h, 0) gl.glVertex3f((place + val) * s, 4 * h, 0) gl.glVertex3f((place + val) * s, 2 * h, 0) gl.glVertex3f((place + 0) * s, 2 * h, 0) true_speed = np.sqrt( np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1])) vertical_ind(5, 0.02 * true_speed, (1, 1, 1)) vertical_ind(7, 0.01 * self.car.wheels[0].omega, (0.0, 0, 1)) # ABS sensors vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1)) vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1)) vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1)) horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0)) horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0)) gl.glEnd() self.score_label.text = "%04i" % self.reward self.score_label.draw()
class CarRacingSoft(gym.Env, EzPickle): metadata = {'render.modes': ['human'], 'video.frames_per_second': FPS} color_black = np.array([0., 0., 0.]) color_white = np.array([1., 1., 1.]) color_red = np.array([1., 0., 0.]) color_green = np.array([0., 1., 0.]) color_grass_dark = np.array([0.4, 0.8, 0.4]) color_grass_light = np.array([0.4, 0.9, 0.4]) color_abs_light = np.array([0., 0., 1.]) color_abs_dark = np.array([0.2, 0., 1.]) def __init__(self, frame_skip, verbose=False): EzPickle.__init__(self) if frame_skip < 1: raise ValueError("The value of frame_skip must be at least 1") self.seed() self.contactListener_keepref = FrictionDetector(self) self.world = Box2D.b2World( (0, 0), contactListener=self.contactListener_keepref) self.viewer = None self.invisible_state_window = None self.invisible_video_window = None self.road = None self.car = None self.reward = 0.0 self.prev_reward = 0.0 self.verbose = verbose self.fd_tile = fixtureDef(shape=polygonShape( vertices=[(0, 0), (1, 0), (1, -1), (0, -1)])) self.action_space = spaces.Box(np.array([-1, 0, 0], dtype=np.float32), np.array([+1, +1, +1], dtype=np.float32), dtype=np.float32) # steer, gas, brake self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.float32) self.state = np.zeros([STATE_H, STATE_W, 3], dtype=np.float32) self.frame_skip = frame_skip def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _destroy(self): if not self.road: return for t in self.road: self.world.DestroyBody(t) self.road = [] self.car.destroy() def _create_track(self): CHECKPOINTS = 12 # Create checkpoints checkpoints = [] for c in range(CHECKPOINTS): alpha = 2 * math.pi * c / CHECKPOINTS + self.np_random.uniform( 0, 2 * math.pi * 1 / CHECKPOINTS) rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD) if c == 0: alpha = 0 rad = 1.5 * TRACK_RAD if c == CHECKPOINTS - 1: alpha = 2 * math.pi * c / CHECKPOINTS self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS rad = 1.5 * TRACK_RAD checkpoints.append( (alpha, rad * math.cos(alpha), rad * math.sin(alpha))) self.road = [] # Go from one checkpoint to another to create track x, y, beta = 1.5 * TRACK_RAD, 0, 0 dest_i = 0 laps = 0 track = [] no_freeze = 2500 visited_other_side = False while True: alpha = math.atan2(y, x) if visited_other_side and alpha > 0: laps += 1 visited_other_side = False if alpha < 0: visited_other_side = True alpha += 2 * math.pi while True: # Find destination from checkpoints failed = True while True: dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)] if alpha <= dest_alpha: failed = False break dest_i += 1 if dest_i % len(checkpoints) == 0: break if not failed: break alpha -= 2 * math.pi continue r1x = math.cos(beta) r1y = math.sin(beta) p1x = -r1y p1y = r1x dest_dx = dest_x - x # vector towards destination dest_dy = dest_y - y proj = r1x * dest_dx + r1y * dest_dy # destination vector projected on rad while beta - alpha > 1.5 * math.pi: beta -= 2 * math.pi while beta - alpha < -1.5 * math.pi: beta += 2 * math.pi prev_beta = beta proj *= SCALE if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001 * proj)) if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001 * proj)) x += p1x * TRACK_DETAIL_STEP y += p1y * TRACK_DETAIL_STEP track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y)) if laps > 4: break no_freeze -= 1 if no_freeze == 0: break # Find closed loop range i1..i2, first loop should be ignored, second is OK i1, i2 = -1, -1 i = len(track) while True: i -= 1 if i == 0: return False # Failed pass_through_start = track[i][0] > self.start_alpha and track[ i - 1][0] <= self.start_alpha if pass_through_start and i2 == -1: i2 = i elif pass_through_start and i1 == -1: i1 = i break if self.verbose: print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1)) assert i1 != -1 assert i2 != -1 track = track[i1:i2 - 1] first_beta = track[0][1] first_perp_x = math.cos(first_beta) first_perp_y = math.sin(first_beta) # Length of perpendicular jump to put together head and tail well_glued_together = np.sqrt( np.square(first_perp_x * (track[0][2] - track[-1][2])) + np.square(first_perp_y * (track[0][3] - track[-1][3]))) if well_glued_together > TRACK_DETAIL_STEP: return False # Red-white border on hard turns border = [False] * len(track) for i in range(len(track)): good = True oneside = 0 for neg in range(BORDER_MIN_COUNT): beta1 = track[i - neg - 0][1] beta2 = track[i - neg - 1][1] good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2 oneside += np.sign(beta1 - beta2) good &= abs(oneside) == BORDER_MIN_COUNT border[i] = good for i in range(len(track)): for neg in range(BORDER_MIN_COUNT): border[i - neg] |= border[i] # Create tiles for i in range(len(track)): alpha1, beta1, x1, y1 = track[i] alpha2, beta2, x2, y2 = track[i - 1] road1_l = (x1 - TRACK_WIDTH * math.cos(beta1), y1 - TRACK_WIDTH * math.sin(beta1)) road1_r = (x1 + TRACK_WIDTH * math.cos(beta1), y1 + TRACK_WIDTH * math.sin(beta1)) road2_l = (x2 - TRACK_WIDTH * math.cos(beta2), y2 - TRACK_WIDTH * math.sin(beta2)) road2_r = (x2 + TRACK_WIDTH * math.cos(beta2), y2 + TRACK_WIDTH * math.sin(beta2)) vertices = [road1_l, road1_r, road2_r, road2_l] self.fd_tile.shape.vertices = vertices t = self.world.CreateStaticBody(fixtures=self.fd_tile) t.userData = t c = 0.01 * (i % 3) t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c] t.road_visited = False t.road_friction = 1.0 t.fixtures[0].sensor = True self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color)) self.road.append(t) if border[i]: side = np.sign(beta2 - beta1) b1_l = (x1 + side * TRACK_WIDTH * math.cos(beta1), y1 + side * TRACK_WIDTH * math.sin(beta1)) b1_r = (x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1), y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1)) b2_l = (x2 + side * TRACK_WIDTH * math.cos(beta2), y2 + side * TRACK_WIDTH * math.sin(beta2)) b2_r = (x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2), y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2)) self.road_poly.append(([b1_l, b1_r, b2_r, b2_l], (1, 1, 1) if i % 2 == 0 else (1, 0, 0))) self.track = track return True def reset(self): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] self.human_render = False self.frames = 0 while True: success = self._create_track() if success: break if self.verbose: print( "retry to generate track (normal if there are not many instances of this message)" ) self.car = Car(self.world, *self.track[0][1:4]) return self.step(None)[0] def step(self, action): total_reward = 0 for _ in range(self.frame_skip): if action is not None: self.car.steer(-action[0]) self.car.gas(action[1]) self.car.brake(action[2]) self.car.step(1.0 / FPS) self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) self.t += 1.0 / FPS step_reward = 0 done = False if action is not None: # First step without action, called from reset() self.reward -= 0.1 # We actually don't want to count fuel spent, we want car to be faster. # self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER self.car.fuel_spent = 0.0 step_reward = self.reward - self.prev_reward self.prev_reward = self.reward if self.tile_visited_count == len(self.track): done = True x, y = self.car.hull.position if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD: done = True step_reward = -100 total_reward += step_reward self.frames += 1 if self.frames > 1000: done = True if done or action is None: break self._draw() green = (self.state[66:78, 43:52, 1] > 0.5) # print("green:", sum(green.flatten())) speed = sum(self.state[85:, 2, 0]) abs1 = sum(self.state[85:, 9, 2]) abs2 = sum(self.state[85:, 14, 2]) abs3 = sum(self.state[85:, 19, 2]) abs4 = sum(self.state[85:, 24, 2]) steering_input_left = sum(self.state[90, 37:48, 1]) steering_input_right = sum(self.state[90, 47:58, 1]) steering = steering_input_right - steering_input_left rotation_left = sum(self.state[90, 59:72, 0]) rotation_right = sum(self.state[90, 72:85, 0]) rotation = rotation_right - rotation_left print( f"speed:{speed}\tabs:\t{abs1}\t{abs2}\t{abs3}\t{abs4}\tsteering:{steering}\trotation:{rotation}" ) return np.copy(self.state), total_reward, done, {} def render(self, mode='human', close=False): if close: if self.viewer is not None: self.viewer.close() self.viewer = None return if self.viewer is None: from gym.envs.classic_control import rendering self.viewer = rendering.SimpleImageViewer() self.viewer.imshow((self.state.repeat(RENDER_UPSCALE, axis=0).repeat( RENDER_UPSCALE, axis=1) * 255).astype(np.uint8)) def _draw(self): # Simple 2D affine transformation class class Transform(): def __init__(self, *values): self.matrix = values if len(values) else [ 1., 0., 0., 0., 1., 0., 0., 0., 1. ] @staticmethod def translation(x, y): return Transform(1.0, 0.0, x, 0.0, 1.0, y, 0.0, 0.0, 1.0) @staticmethod def scale(x, y): return Transform(x, 0.0, 0.0, 0.0, y, 0.0, 0.0, 0.0, 1.0) @staticmethod def rotation(angle): cos, sin = math.cos(angle), math.sin(angle) return Transform(cos, -sin, 0.0, sin, cos, 0.0, 0.0, 0.0, 1.0) def apply_and_swap(self, point): sa, sb, sc, sd, se, sf, _, _, _ = self.matrix x, y = point return (x * sd + y * se + sf, x * sa + y * sb + sc) def __mul__(self, other): sa, sb, sc, sd, se, sf, _, _, _ = self.matrix oa, ob, oc, od, oe, of, _, _, _ = other.matrix return Transform(sa * oa + sb * od, sa * ob + sb * oe, sa * oc + sb * of + sc, sd * oa + se * od, sd * ob + se * oe, sd * oc + se * of + sf, 0.0, 0.0, 1.0) def __imul__(self, other): return self.__mul__(other) class Renderer(): def __init__(self, env): self.env = env def draw_polygon(self, path, color): self.env._fill_polygon(path, self.env.state, color) if "t" not in self.__dict__: return # reset() not called yet zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min( self.t, 1) # Animate zoom first second scroll_x = self.car.hull.position[0] scroll_y = self.car.hull.position[1] angle = -self.car.hull.angle vel = self.car.hull.linearVelocity if np.linalg.norm(vel) > 0.5: angle = math.atan2(vel[0], vel[1]) self.transform = Transform.translation(STATE_W / 2, STATE_H * 3 / 4) self.transform *= Transform.scale(STATE_W / 1000, STATE_H / 1000) self.transform *= Transform.scale(zoom, -zoom) self.transform *= Transform.rotation(angle) self.transform *= Transform.translation(-scroll_x, -scroll_y) # Clear self.state[:, :, :] = self.color_black # Draw road, car and indicators self._render_road(scroll_x, scroll_y, zoom) self.car.draw(Renderer(self), False) self._render_indicators() def _render_road(self, scroll_x, scroll_y, zoom): self._fill_polygon([(-PLAYFIELD, +PLAYFIELD), (+PLAYFIELD, +PLAYFIELD), (+PLAYFIELD, -PLAYFIELD), (-PLAYFIELD, -PLAYFIELD)], self.state, self.color_grass_dark) k = PLAYFIELD / 20.0 mindist = 2000000 / (zoom**2) for x in range(-20, 20, 2): kx = k * x dist = (kx - scroll_x)**2 if dist >= mindist: continue for y in range(-20, 20, 2): ky = k * y if dist + (ky - scroll_y)**2 >= mindist: continue self._fill_polygon([(kx + k, ky + 0), (kx + 0, ky + 0), (kx + 0, ky + k), (kx + k, ky + k)], self.state, self.color_grass_light) for poly, color in self.road_poly: if (poly[0][0] - scroll_x)**2 + (poly[0][1] - scroll_y)**2 >= mindist: continue self._fill_polygon(poly, self.state, color) def _render_indicators(self): s = STATE_W / 40 h = STATE_H / 40 self._fill_polygon([(0, STATE_H), (STATE_W, STATE_H), (STATE_W, STATE_H - 5 * h), (0, STATE_H - 5 * h)], self.state, self.color_black, transform=False) def vertical_ind(place, val, color): self._fill_polygon([((place + 0) * s, STATE_H - h - h * val), ((place + 2) * s, STATE_H - h - h * val), ((place + 2) * s, STATE_H - h), ((place + 0) * s, STATE_H - h)], self.state, color, transform=False) def horiz_ind(place, val, color): self._fill_polygon([((place + 0) * s, STATE_H - 4 * h), ((place + val) * s, STATE_H - 4 * h), ((place + val) * s, STATE_H - 1.5 * h), ((place + 0) * s, STATE_H - 1.5 * h)], self.state, color, transform=False) true_speed = np.sqrt( np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1])) vertical_ind(1, 0.02 * true_speed, self.color_white) vertical_ind(4, 0.01 * self.car.wheels[0].omega, self.color_abs_light) # ABS sensors vertical_ind(6, 0.01 * self.car.wheels[1].omega, self.color_abs_light) vertical_ind(8, 0.01 * self.car.wheels[2].omega, self.color_abs_dark) vertical_ind(10, 0.01 * self.car.wheels[3].omega, self.color_abs_dark) horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, self.color_green) horiz_ind(30, -0.8 * self.car.hull.angularVelocity, self.color_red) # Adapted from https://github.com/luispedro/mahotas/blob/master/mahotas/polygon.py def _fill_polygon(self, polygon, canvas, color, transform=True): ''' fill_polygon([(y0,x0), (y1,x1),...], canvas, color=1) Draw a filled polygon in canvas Parameters ---------- polygon : list of pairs a list of (y,x) points canvas : ndarray where to draw, will be modified in place color : integer, optional which colour to use (default: 1) ''' # algorithm adapted from: http://www.alienryderflex.com/polygon_fill/ if not len(polygon): return if transform: polygon = [ self.transform.apply_and_swap(point) for point in polygon ] else: polygon = [(float(y), float(x)) for x, y in polygon] min_y = max(int(min(y for y, x in polygon)), 0) if min_y >= canvas.shape[0]: return max_y = min(max(int(max(y + 1 for y, x in polygon)), 0), canvas.shape[0]) if max_y <= 0: return if min(x for y, x in polygon) >= canvas.shape[1]: return if max(x for y, x in polygon) < 0: return for y in range(min_y, max_y): nodes = [] j = -1 for i, p in enumerate(polygon): pj = polygon[j] if p[0] < y and pj[0] >= y or pj[0] < y and p[0] >= y: dy = pj[0] - p[0] if dy: nodes.append((p[1] + (y - p[0]) / (pj[0] - p[0]) * (pj[1] - p[1]))) elif p[0] == y: nodes.append(p[1]) j = i nodes.sort() for n, nn in zip(nodes[::2], nodes[1::2]): canvas[y, max(int(n), 0):min(max(int(nn), 0), canvas.shape[1] )] = color
class CarRacing(gym.Env, EzPickle): metadata = { 'render.modes': ['human', 'rgb_array', 'state_pixels'], 'video.frames_per_second': FPS } def __init__(self, verbose=1): EzPickle.__init__(self) self.seed() self.contactListener_keepref = FrictionDetector(self) self.world = Box2D.b2World( (0, 0), contactListener=self.contactListener_keepref) self.viewer = None self.invisible_state_window = None self.invisible_video_window = None self.road = None self.car = None self.reward = 0.0 self.prev_reward = 0.0 self.verbose = verbose self.fd_tile = fixtureDef(shape=polygonShape( vertices=[(0, 0), (1, 0), (1, -1), (0, -1)])) self.action_space = spaces.Box(np.array([-1, 0, 0]), np.array([+1, +1, +1]), dtype=np.float32) # steer, gas, brake self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8) def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _destroy(self): if not self.road: return for t in self.road: self.world.DestroyBody(t) self.road = [] self.car.destroy() def _create_track(self): CHECKPOINTS = 12 # Create checkpoints checkpoints = [] for c in range(CHECKPOINTS): alpha = 2 * math.pi * c / CHECKPOINTS + self.np_random.uniform( 0, 2 * math.pi * 1 / CHECKPOINTS) rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD) if c == 0: alpha = 0 rad = 1.5 * TRACK_RAD if c == CHECKPOINTS - 1: alpha = 2 * math.pi * c / CHECKPOINTS self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS rad = 1.5 * TRACK_RAD checkpoints.append( (alpha, rad * math.cos(alpha), rad * math.sin(alpha))) # print "\n".join(str(h) for h in checkpoints) # self.road_poly = [ ( # uncomment this to see checkpoints # [ (tx,ty) for a,tx,ty in checkpoints ], # (0.7,0.7,0.9) ) ] self.road = [] # Go from onCommunicating the goal of a task to another person is easy: we can use language, show them an image of the desired outcome, point them to a how-to video, or use some combination of all of these. On the other hand, specifying a task to a robot for reinforcement learning requires substantial effort. Most prior work that has applied deep reinforcement learning to real robots makes uses of specialized sensors to obtain rewards or studies tasks where the robot’s internal sensors can be used to measure reward. For example, using thermal cameras for tracking fluids, or purpose-built computer vision systems for tracking objects. Since such instrumentation needs to be done for any new task that we may wish to learn, it poses a significant bottleneck to widespread adoption of reinforcement learning for robotics, and precludes the use of these methods directly in open-world environments that lack this instrumentation.e checkpoint to another to create track x, y, beta = 1.5 * TRACK_RAD, 0, 0 dest_i = 0 laps = 0 track = [] no_freeze = 2500 visited_other_side = False while True: alpha = math.atan2(y, x) if visited_other_side and alpha > 0: laps += 1 visited_other_side = False if alpha < 0: visited_other_side = True alpha += 2 * math.pi while True: # Find destination from checkpoints failed = True while True: dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)] if alpha <= dest_alpha: failed = False break dest_i += 1 if dest_i % len(checkpoints) == 0: break if not failed: break alpha -= 2 * math.pi continue r1x = math.cos(beta) r1y = math.sin(beta) p1x = -r1y p1y = r1x dest_dx = dest_x - x # vector towards destination dest_dy = dest_y - y proj = r1x * dest_dx + r1y * dest_dy # destination vector projected on rad while beta - alpha > 1.5 * math.pi: beta -= 2 * math.pi while beta - alpha < -1.5 * math.pi: beta += 2 * math.pi prev_beta = beta proj *= SCALE if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001 * proj)) if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001 * proj)) x += p1x * TRACK_DETAIL_STEP y += p1y * TRACK_DETAIL_STEP track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y)) if laps > 4: break no_freeze -= 1 if no_freeze == 0: break # print "\n".join([str(t) for t in enumerate(track)]) # Find closed loop range i1..i2, first loop should be ignored, second is OK i1, i2 = -1, -1 i = len(track) while True: i -= 1 if i == 0: return False # Failed pass_through_start = track[i][0] > self.start_alpha and track[ i - 1][0] <= self.start_alpha if pass_through_start and i2 == -1: i2 = i elif pass_through_start and i1 == -1: i1 = i break if self.verbose == 1: print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1)) assert i1 != -1 assert i2 != -1 track = track[i1:i2 - 1] first_beta = track[0][1] first_perp_x = math.cos(first_beta) first_perp_y = math.sin(first_beta) # Length of perpendicular jump to put together head and tail well_glued_together = np.sqrt( np.square(first_perp_x * (track[0][2] - track[-1][2])) + np.square(first_perp_y * (track[0][3] - track[-1][3]))) if well_glued_together > TRACK_DETAIL_STEP: return False # Red-white border on hard turns border = [False] * len(track) for i in range(len(track)): good = True oneside = 0 for neg in range(BORDER_MIN_COUNT): beta1 = track[i - neg - 0][1] beta2 = track[i - neg - 1][1] good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2 oneside += np.sign(beta1 - beta2) good &= abs(oneside) == BORDER_MIN_COUNT border[i] = good for i in range(len(track)): for neg in range(BORDER_MIN_COUNT): border[i - neg] |= border[i] # Create tiles for i in range(len(track)): alpha1, beta1, x1, y1 = track[i] alpha2, beta2, x2, y2 = track[i - 1] road1_l = (x1 - TRACK_WIDTH * math.cos(beta1), y1 - TRACK_WIDTH * math.sin(beta1)) road1_r = (x1 + TRACK_WIDTH * math.cos(beta1), y1 + TRACK_WIDTH * math.sin(beta1)) road2_l = (x2 - TRACK_WIDTH * math.cos(beta2), y2 - TRACK_WIDTH * math.sin(beta2)) road2_r = (x2 + TRACK_WIDTH * math.cos(beta2), y2 + TRACK_WIDTH * math.sin(beta2)) vertices = [road1_l, road1_r, road2_r, road2_l] self.fd_tile.shape.vertices = vertices t = self.world.CreateStaticBody(fixtures=self.fd_tile) t.userData = t c = 0.01 * (i % 3) t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c] t.road_visited = False t.road_friction = 1.0 t.fixtures[0].sensor = True self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color)) self.road.append(t) if border[i]: side = np.sign(beta2 - beta1) b1_l = (x1 + side * TRACK_WIDTH * math.cos(beta1), y1 + side * TRACK_WIDTH * math.sin(beta1)) b1_r = (x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1), y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1)) b2_l = (x2 + side * TRACK_WIDTH * math.cos(beta2), y2 + side * TRACK_WIDTH * math.sin(beta2)) b2_r = (x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2), y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2)) self.road_poly.append(([b1_l, b1_r, b2_r, b2_l], (1, 1, 1) if i % 2 == 0 else (1, 0, 0))) self.track = track return True def reset(self): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] while True: success = self._create_track() if success: break if self.verbose == 1: print( "retry to generate track (normal if there are not many of this messages)" ) self.car = Car(self.world, *self.track[0][1:4]) return self.step(None)[0] def step(self, action): if action is not None: self.car.steer(-action[0]) self.car.gas(action[1]) self.car.brake(action[2]) self.car.step(1.0 / FPS) self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) self.t += 1.0 / FPS self.state = self.render("state_pixels") step_reward = 0 done = False if action is not None: # First step without action, called from reset() self.reward -= 0.1 # We actually don't want to count fuel spent, we want car to be faster. # self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER self.car.fuel_spent = 0.0 step_reward = self.reward - self.prev_reward self.prev_reward = self.reward if self.tile_visited_count == len(self.track): done = True x, y = self.car.hull.position if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD: done = True step_reward = -100 return self.state, step_reward, done, {} def render(self, mode='human'): assert mode in ['human', 'state_pixels', 'rgb_array'] if self.viewer is None: from gym.envs.classic_control import rendering self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) self.score_label = pyglet.text.Label('0000', font_size=36, x=20, y=WINDOW_H * 2.5 / 40.00, anchor_x='left', anchor_y='center', color=(255, 255, 255, 255)) self.transform = rendering.Transform() if "t" not in self.__dict__: return # reset() not called yet zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min( self.t, 1) # Animate zoom first second zoom_state = ZOOM * SCALE * STATE_W / WINDOW_W zoom_video = ZOOM * SCALE * VIDEO_W / WINDOW_W scroll_x = self.car.hull.position[0] scroll_y = self.car.hull.position[1] angle = -self.car.hull.angle vel = self.car.hull.linearVelocity if np.linalg.norm(vel) > 0.5: angle = math.atan2(vel[0], vel[1]) self.transform.set_scale(zoom, zoom) self.transform.set_translation( WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) - scroll_y * zoom * math.sin(angle)), WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) + scroll_y * zoom * math.cos(angle))) self.transform.set_rotation(angle) self.car.draw(self.viewer, mode != "state_pixels") arr = None win = self.viewer.window win.switch_to() win.dispatch_events() win.clear() t = self.transform if mode == 'rgb_array': VP_W = VIDEO_W VP_H = VIDEO_H elif mode == 'state_pixels': VP_W = STATE_W VP_H = STATE_H else: pixel_scale = 1 if hasattr(win.context, '_nscontext'): pixel_scale = win.context._nscontext.view().backingScaleFactor( ) # pylint: disable=protected-access VP_W = int(pixel_scale * WINDOW_W) VP_H = int(pixel_scale * WINDOW_H) gl.glViewport(0, 0, VP_W, VP_H) t.enable() self.render_road() for geom in self.viewer.onetime_geoms: geom.render() self.viewer.onetime_geoms = [] t.disable() self.render_indicators(WINDOW_W, WINDOW_H) global previous_road global rgb if mode == 'human': win.flip() return self.viewer.isopen image_data = pyglet.image.get_buffer_manager().get_color_buffer( ).get_image_data() arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep='') arr = arr.reshape(VP_H, VP_W, 4) arr = arr[::-1, :, 0:3] arr = cv2.cvtColor(arr, cv2.COLOR_RGB2GRAY) # rgb_weights = [0.2989, 0.5870, 0.1140] # arr = np.dot(arr[...,:3], rgb_weights) if len(previous_road) >= 1: arrs = np.fromstring(previous_road[0].get_data(), dtype=np.uint8, sep='') arrs = arrs.reshape(VP_H, VP_W, 4) arrs = arrs[::-1, :, 0:3] rgb = arrs # arrs = np.dot(arrs[...,:3], rgb_weights) arrs = cv2.cvtColor(arrs, cv2.COLOR_RGB2GRAY) previous_road.append(image_data) previous_road = previous_road[1:] # arrs = np.fromstring(previous_road[0].get_data(), dtype=np.uint8, sep='') # arrs = arrs.reshape(VP_H, VP_W, 4) # arrs = arrs[::-1, :, 0:3] # previous_road.append(image_data) #previous_road.append(1) else: previous_road.append(image_data) # print("Printing") #print("prev",previous_road) arrs = np.fromstring(previous_road[0].get_data(), dtype=np.uint8, sep='') arrs = arrs.reshape(VP_H, VP_W, 4) arrs = arrs[::-1, :, 0:3] rgb = arrs #arrs = np.dot(arrs[...,:3], rgb_weights) arrs = cv2.cvtColor(arrs, cv2.COLOR_RGB2GRAY) # previous_road.append(0) #print(type(previous_road)) # grayscales_image = np.dot(arrs[...,:3], rgb_weights) #event_frame = cv2.absdiff(arr,arrs) global event_frame event_frame = cv2.absdiff(arr, arrs) imageio.imwrite('image_name.png', event_frame) # imageio.imwrite('image_names.png', arrs) # imageio.imwrite('events.png', event_frame) #print(event_frame) return event_frame def returnRgb(self): return rgb, event_frame def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None def render_road(self): gl.glBegin(gl.GL_QUADS) gl.glColor4f(0.4, 0.8, 0.4, 1.0) gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0) gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0) gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0) gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0) gl.glColor4f(0.4, 0.9, 0.4, 1.0) k = PLAYFIELD / 20.0 for x in range(-20, 20, 2): for y in range(-20, 20, 2): gl.glVertex3f(k * x + k, k * y + 0, 0) gl.glVertex3f(k * x + 0, k * y + 0, 0) gl.glVertex3f(k * x + 0, k * y + k, 0) gl.glVertex3f(k * x + k, k * y + k, 0) for poly, color in self.road_poly: gl.glColor4f(color[0], color[1], color[2], 1) for p in poly: gl.glVertex3f(p[0], p[1], 0) gl.glEnd() def render_indicators(self, W, H): gl.glBegin(gl.GL_QUADS) s = W / 40.0 h = H / 40.0 gl.glColor4f(0, 0, 0, 1) gl.glVertex3f(W, 0, 0) gl.glVertex3f(W, 5 * h, 0) gl.glVertex3f(0, 5 * h, 0) gl.glVertex3f(0, 0, 0) def vertical_ind(place, val, color): gl.glColor4f(color[0], color[1], color[2], 1) gl.glVertex3f((place + 0) * s, h + h * val, 0) gl.glVertex3f((place + 1) * s, h + h * val, 0) gl.glVertex3f((place + 1) * s, h, 0) gl.glVertex3f((place + 0) * s, h, 0) def horiz_ind(place, val, color): gl.glColor4f(color[0], color[1], color[2], 1) gl.glVertex3f((place + 0) * s, 4 * h, 0) gl.glVertex3f((place + val) * s, 4 * h, 0) gl.glVertex3f((place + val) * s, 2 * h, 0) gl.glVertex3f((place + 0) * s, 2 * h, 0) true_speed = np.sqrt( np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1])) vertical_ind(5, 0.02 * true_speed, (1, 1, 1)) vertical_ind(7, 0.01 * self.car.wheels[0].omega, (0.0, 0, 1)) # ABS sensors vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1)) vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1)) vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1)) horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0)) horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0)) gl.glEnd() self.score_label.text = "%04i" % self.reward self.score_label.draw()
class CarRacing3(gym.Env, EzPickle): metadata = { 'render.modes': ['human', 'rgb_array', 'state_pixels'], 'state_pixels frame size': [STATE_H, STATE_W], 'render frame size': [WINDOW_H, WINDOW_W], 'FPS, 1/timebase': FPS, #'discretization': DISCRETE, 'Zoom_level': ZOOM, 'Flight start': ZOOM_START, 'show track on 1st frame': TRACK_FIRST, } def __init__(self, seed=None, **kwargs): EzPickle.__init__(self) self._seed(seed) self.contactListener_keepref = FrictionDetector(self) self.world = Box2D.b2World( (0, 0), contactListener=self.contactListener_keepref) self.viewer = None self.road = None self.car = None self.newtile = False self.ep_return = 0.0 self.action_taken = +np.inf self.fd_tile = fixtureDef(shape=polygonShape( vertices=[(0, 0), (1, 0), (1, -1), (0, -1)])) # Config self._set_config(**kwargs) #self._org_config = deepcopy(kwargs) def _set_config( self, game_color=1, # State (frame) color option: 0 = RGB, 1 = Grayscale, 2 = Green only indicators=True, # show or not bottom Info Panel frames_per_state=4, # stacked (rolling history) Frames on each state [1-inf], latest observation always on first Frame skip_frames=3, # number of consecutive Frames to skip between state saves [0-4] discre=ACT, # Action discretization function, format [[steer0, throtle0, brake0], [steer1, ...], ...]. None for continuous use_track=3, # number of times to use the same Track, [1-100]. More than 20 high risk of overfitting!! episodes_per_track=5, # number of evenly distributed starting points on each track [1-20]. Every time you call reset(), the env automatically starts at the next point tr_complexity=12, # generated Track geometric Complexity, [6-20] tr_width=45, # relative Track Width, [30-50] patience=2.0, # max time in secs without Progress, [0.5-20] off_track=1.0, # max time in secs Driving on Grass, [0.0-5] f_reward=CONT_REWARD, # Reward Funtion coefficients, refer to Docu for details num_obstacles=5, # Obstacle objects placed on track [0-10] end_on_contact=False, # Stop Episode on contact with obstacle, not recommended for starting-phase of training obst_location=0, # array pre-setting obstacle Location, in %track. Negative value means tracks's left-hand side. 0 for random location oily_patch=False, # use all obstacles as Low-friction road (oily patch) verbose=2): #Verbosity self.verbose = verbose #obstacle parameters self.num_obstacles = np.clip(num_obstacles, 0, 10) self.end_on_contact = end_on_contact self.oily_patch = oily_patch if obst_location != 0 and len(obst_location) < num_obstacles: print("#####################################") print("Warning: incomplete obstacle location") print("Defaulting to random placement") self.obst_location = 0 #None else: self.obst_location = np.array(obst_location) #reward coefs verification if len(f_reward) < len(CONT_REWARD): print("####################################") print("Warning: incomplete reward function") print("Defaulting to predefined function!!!") self.f_reward = CONT_REWARD else: self.f_reward = f_reward # Times to use same track, up to 100 times. More than 20 high risk of overfitting!! self.repeat_track = np.clip(use_track, 1, 100) self.track_use = +np.inf # Number of episodes on same track, with evenly distributed starting points, # not more than 20 episodes self.episodes_per_track = np.clip(episodes_per_track, 1, 20) # track generation complexity self.complexity = np.clip(tr_complexity, 6, 20) # track width self.tr_width = np.clip(tr_width, 30, 50) / SCALE # Max time without progress self.patience = np.clip(patience, 0.5, 20) # Max time off-track self.off_track = np.clip(off_track, 0, 5) # Show or not bottom info panel self.indicators = indicators # Grayscale and acceptable frames self.grayscale = game_color if not self.grayscale: if frames_per_state > 1: print("####################################") print("Warning: making frames_per_state = 1") print("No support for several frames in RGB") frames_per_state = 1 skip_frames = 0 # Frames to be skipped from state (max 4) self.skip_frames = np.clip(skip_frames + 1, 1, 5) # Frames per state self.frames_per_state = frames_per_state if frames_per_state > 0 else 1 if self.frames_per_state > 1: lst = list( range(0, self.frames_per_state * self.skip_frames, self.skip_frames)) self._update_index = [lst[-1]] + lst[:-1] # Gym spaces, observation and action self.discre = discre if discre == None: self.action_space = spaces.Box( np.array([-0.4, 0, 0]), np.array([+0.4, +1, +1]), dtype=np.float32) # steer, gas, brake else: self.action_space = spaces.Discrete(len(discre)) if game_color: self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, self.frames_per_state), dtype=np.uint8) else: self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8) def _update_state(self, new_frame): if self.frames_per_state > 1: self.int_state[:, :, -1] = new_frame self.state = self.int_state[:, :, self._update_index] self.int_state = np.roll(self.int_state, 1, 2) else: self.state = new_frame def _seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _create_track(self): # Create checkpoints CHECKPOINTS = self.complexity checkpoints = [] for c in range(CHECKPOINTS): alpha = 2 * math.pi * c / CHECKPOINTS + self.np_random.uniform( 0, 2 * math.pi * 1 / CHECKPOINTS) rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD) if c == 0: alpha = 0 rad = 1.0 * TRACK_RAD if c == CHECKPOINTS - 1: alpha = 2 * math.pi * c / CHECKPOINTS self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS rad = 1.0 * TRACK_RAD checkpoints.append( (alpha, rad * math.cos(alpha), rad * math.sin(alpha))) # print "\n".join(str(h) for h in checkpoints) # self.road_poly = [ ( # uncomment this to see checkpoints # [ (tx,ty) for a,tx,ty in checkpoints ], # (0.7,0.7,0.9) ) ] # Go from one checkpoint to another to create track x, y, beta = 1.0 * TRACK_RAD, 0, 0 dest_i = 0 laps = 0 track = [] waypoint = [] no_freeze = 2500 visited_other_side = False while True: alpha = math.atan2(y, x) if visited_other_side and alpha > 0: laps += 1 visited_other_side = False if alpha < 0: visited_other_side = True alpha += 2 * math.pi while True: # Find destination from checkpoints failed = True while True: dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)] if alpha <= dest_alpha: failed = False break dest_i += 1 if dest_i % len(checkpoints) == 0: break if not failed: break alpha -= 2 * math.pi continue r1x = math.cos(beta) r1y = math.sin(beta) p1x = -r1y p1y = r1x dest_dx = dest_x - x # vector towards destination dest_dy = dest_y - y proj = r1x * dest_dx + r1y * dest_dy # destination vector projected on rad while beta - alpha > 1.5 * math.pi: beta -= 2 * math.pi while beta - alpha < -1.5 * math.pi: beta += 2 * math.pi prev_beta = beta proj *= SCALE if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001 * proj)) if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001 * proj)) x += p1x * TRACK_DETAIL_STEP y += p1y * TRACK_DETAIL_STEP track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y)) waypoint.append([x, y]) if laps > 4: break no_freeze -= 1 if no_freeze == 0: break # print "\n".join([str(t) for t in enumerate(track)]) # Find closed loop range i1..i2, first loop should be ignored, second is OK i1, i2 = -1, -1 i = len(track) while True: i -= 1 if i == 0: return False # Failed pass_through_start = track[i][0] > self.start_alpha and track[ i - 1][0] <= self.start_alpha if pass_through_start and i2 == -1: i2 = i elif pass_through_start and i1 == -1: i1 = i break if self.verbose > 0: print("Track generation: %i..%i -> %i-tiles track, complex %i" % (i1, i2, i2 - i1, self.complexity)) assert i1 != -1 assert i2 != -1 track = track[i1:i2 - 1] waypoint = waypoint[i1:i2 - 1] first_beta = track[0][1] first_perp_x = math.cos(first_beta) first_perp_y = math.sin(first_beta) # Length of perpendicular jump to put together head and tail well_glued_together = np.sqrt( np.square(first_perp_x * (track[0][2] - track[-1][2])) + np.square(first_perp_y * (track[0][3] - track[-1][3]))) if well_glued_together > TRACK_DETAIL_STEP: return False # Failed # Red-white border on hard turns, pure colors border = [False] * len(track) for i in range(len(track)): good = True oneside = 0 for neg in range(BORDER_MIN_COUNT): beta1 = track[i - neg - 0][1] beta2 = track[i - neg - 1][1] good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2 oneside += np.sign(beta1 - beta2) good &= abs(oneside) == BORDER_MIN_COUNT border[i] = good for i in range(len(track)): for neg in range(BORDER_MIN_COUNT): border[i - neg] |= border[i] # Get random tile for obstacles, without replacement if np.sum(self.obst_location) == 0: obstacle_tiles_ids = np.random.choice(range(10, len(track) - 6), self.num_obstacles, replace=False) obstacle_tiles_ids *= ( np.random.randint(0, 2, self.num_obstacles) * 2 - 1) #obstacle_tiles_ids[0] = 4 else: obstacle_tiles_ids = np.rint(self.obst_location * len(track) / 100).astype(int) obstacle_tiles_ids = obstacle_tiles_ids[0:self.num_obstacles] if self.verbose >= 2: print(self.num_obstacles, ' obstacles on tiles: ', obstacle_tiles_ids[np.argsort(np.abs(obstacle_tiles_ids))]) #stores values and call tile generation self.border = border self.track = track self.waypoints = np.asarray(waypoint) self.obstacle_tiles_ids = obstacle_tiles_ids self._create_tiles(track, border) return True #self.waypoint #True def _give_track(self): return self.track, self.waypoints, self.obstacles_poly def _create_tiles(self, track, border): # first you need to clear everything if self.road is not None: for t in self.road: self.world.DestroyBody(t) self.road = [] self.road_poly = [] # Create track tiles for i in range(len(track)): alpha1, beta1, x1, y1 = track[i] alpha2, beta2, x2, y2 = track[i - 1] road1_l = (x1 - self.tr_width * math.cos(beta1), y1 - self.tr_width * math.sin(beta1)) road1_r = (x1 + self.tr_width * math.cos(beta1), y1 + self.tr_width * math.sin(beta1)) road2_l = (x2 - self.tr_width * math.cos(beta2), y2 - self.tr_width * math.sin(beta2)) road2_r = (x2 + self.tr_width * math.cos(beta2), y2 + self.tr_width * math.sin(beta2)) vertices = [road1_l, road1_r, road2_r, road2_l] self.fd_tile.shape.vertices = vertices t = self.world.CreateStaticBody(fixtures=self.fd_tile) t.userData = t c = 0.02 * (i % 3) t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c] t.road_visited = False t.typename = 'tile' t.road_friction = 1.0 t.fixtures[0].sensor = True self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color)) self.road.append(t) if border[i]: side = np.sign(beta2 - beta1) b1_l = (x1 + side * self.tr_width * math.cos(beta1), y1 + side * self.tr_width * math.sin(beta1)) b1_r = (x1 + side * (self.tr_width + BORDER) * math.cos(beta1), y1 + side * (self.tr_width + BORDER) * math.sin(beta1)) b2_l = (x2 + side * self.tr_width * math.cos(beta2), y2 + side * self.tr_width * math.sin(beta2)) b2_r = (x2 + side * (self.tr_width + BORDER) * math.cos(beta2), y2 + side * (self.tr_width + BORDER) * math.sin(beta2)) self.road_poly.append( ([b1_l, b1_r, b2_r, b2_l], (1, 1, 1) if i % 2 == 0 else BORDER_COLOR)) #create obstacles tiles if self.num_obstacles: self._create_obstacles() def _create_obstacles(self): # Create obstacle (blue rectangle of fixed width and randomish position in tile) count = 1 self.obstacles_poly = [] width = self.tr_width / 2 obst_len = 3 if self.oily_patch else 1 for idx in self.obstacle_tiles_ids: if idx < 0: idx = -idx alpha1, beta1, x1, y1 = self.track[idx] alpha2, beta2, x2, y2 = self.track[idx + obst_len] p1 = (x1 - width * math.cos(beta1), y1 - width * math.sin(beta1)) p2 = (x1, y1) p3 = (x2, y2) p4 = (x2 - width * math.cos(beta2), y2 - width * math.sin(beta2)) else: alpha1, beta1, x1, y1 = self.track[idx] alpha2, beta2, x2, y2 = self.track[idx + obst_len] p1 = (x1, y1) p2 = (x1 + width * math.cos(beta1), y1 + width * math.sin(beta1)) p3 = (x2 + width * math.cos(beta2), y2 + width * math.sin(beta2)) p4 = (x2, y2) vertices = [p1, p2, p3, p4] # Add it to obstacles, Add it to poly_obstacles t = self.world.CreateStaticBody(fixtures=fixtureDef( shape=polygonShape(vertices=vertices))) t.userData = t if self.oily_patch: t.color = OILY_COLOR t.road_friction = 0.2 else: t.color = OBSTACLE_COLOR t.road_friction = 1.0 t.typename = 'obstacle' t.road_visited = False t.id = count t.tile_id = idx t.fixtures[0].sensor = True self.road.append(t) self.obstacles_poly.append((vertices, t.color)) count += 1 def _closest_node(self, node, nodes): #nodes = np.asarray(nodes) deltas = nodes - node dist_2 = np.einsum('ij,ij->i', deltas, deltas) return np.argmin(dist_2) def _closest_dist(self, node, nodes): #nodes = np.asarray(nodes) deltas = nodes - node dist_2 = np.einsum('ij,ij->i', deltas, deltas) return np.sqrt(min(dist_2)) def _render_road(self): gl.glBegin(gl.GL_QUADS) gl.glColor4f(GRASS_COLOR[0], GRASS_COLOR[1], GRASS_COLOR[2], 1.0) gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0) gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0) gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0) gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0) gl.glColor4f(GRASS_COLOR[0] - 0, GRASS_COLOR[1] + 0.1, GRASS_COLOR[2] - 0, 1.0) k = PLAYFIELD / 20.0 for x in range(-20, 20, 2): for y in range(-20, 20, 2): gl.glVertex3f(k * x + k, k * y + 0, 0) gl.glVertex3f(k * x + 0, k * y + 0, 0) gl.glVertex3f(k * x + 0, k * y + k, 0) gl.glVertex3f(k * x + k, k * y + k, 0) for poly, color in self.road_poly: gl.glColor4f(color[0], color[1], color[2], 1) for p in poly: gl.glVertex3f(p[0], p[1], 0) if self.num_obstacles > 0: self._render_obstacles() gl.glEnd() def _render_obstacles(self): #Can only be called inside a glBegin!!! for poly, color in self.obstacles_poly: # drawing road old way gl.glColor4f(color[0], color[1], color[2], 1) for p in poly: gl.glVertex3f(p[0], p[1], 0) def _render_indicators(self, W, H): def vertical_ind(place, val, color): gl.glColor4f(color[0], color[1], color[2], 1) gl.glVertex3f((place + 0) * s, h + h * val, 0) gl.glVertex3f((place + 1) * s, h + h * val, 0) gl.glVertex3f((place + 1) * s, h, 0) gl.glVertex3f((place + 0) * s, h, 0) def horiz_ind(place, val, color): gl.glColor4f(color[0], color[1], color[2], 1) gl.glVertex3f((place + 0) * s, 4 * h, 0) gl.glVertex3f((place + val) * s, 4 * h, 0) gl.glVertex3f((place + val) * s, 2 * h, 0) gl.glVertex3f((place + 0) * s, 2 * h, 0) s = W / 4 #horizontal slot separation #h = H_INDI #vertical pixels definition h = H / 40.0 #black bar, 5x h height gl.glBegin(gl.GL_QUADS) gl.glColor4f(0, 0, 0, 1) gl.glVertex3f(W, 0, 0) gl.glVertex3f(W, 5 * h, 0) gl.glVertex3f(0, 5 * h, 0) gl.glVertex3f(0, 0, 0) #3 hor indicators true_speed = np.sqrt( np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1])) #vertical_ind(5, 0.02*true_speed, (1,1,1)) horiz_ind(1.0, 0.015 * true_speed, (1, 1, 1)) horiz_ind(2.5, -1 * self.car.wheels[0].joint.angle, (0, 1, 0)) horiz_ind(3.5, np.clip(-0.03 * self.car.hull.angularVelocity, -0.4, 0.4), (1, 1, 0)) #vertical_ind(7, 0.01*self.car.wheels[0].omega, (0.0,0,1)) # ABS sensors #vertical_ind(8, 0.01*self.car.wheels[1].omega, (0.0,0,1)) #vertical_ind(9, 0.01*self.car.wheels[2].omega, (0.2,0,1)) #vertical_ind(10,0.01*self.car.wheels[3].omega, (0.2,0,1)) gl.glEnd() #total_reward self.score_label.text = "%02.1f" % self.ep_return self.score_label.draw() def reset(self): self.ep_return = 0.0 self.newtile = False self.tile_visited_count = 0 self.last_touch_with_track = 0 self.last_new_tile = 0 self.obst_contact = False self.obst_contact_count = 0 self.obst_contact_list = [] self.t = 0.0 self.steps_in_episode = 0 self.state = np.zeros(self.observation_space.shape) self.internal_frames = self.skip_frames * (self.frames_per_state - 1) + 1 self.int_state = np.zeros([STATE_H, STATE_W, self.internal_frames]) if self.track_use >= self.repeat_track * self.episodes_per_track: intento = 0 while intento < 21: success = self._create_track() intento += 1 if success: self.track_use = 0 self.episode_start = range( 0, len(self.track), int(len(self.track) / self.episodes_per_track)) #print(self.episode_start) break if self.verbose > 0: print( intento, " retry to generate new track (normal below 10, limit 20)" ) else: self._create_tiles(self.track, self.border) start_tile = self.episode_start[self.track_use % self.episodes_per_track] #print(start_tile, self.track_use, self.episodes_per_track) if self.car is not None: self.car.destroy() if self.episodes_per_track > 1: self.car = Car(self.world, *self.track[start_tile][1:4]) else: self.car = Car(self.world, *self.track[0][1:4]) #trying to detect two very close reset() if self.action_taken > 2: self.track_use += 1 self.action_taken = 0 #self.track_use += 1 return self.step(None)[0] def reset_track(self): self.track_use = +np.inf self.reset() return self.step(None)[0] def step(self, action): # Avoid first step with action=None, called from reset() if action is None: #render car and environment self.car.steer(0) self.car.step(0) self.world.Step(0, 6 * 30, 2 * 30) #step_reward = 0 #self.state(self.render("state_pixels")) else: if not self.discre == None: action = self.discre[action] #moves the car per action, advances time self.car.steer(-action[0]) self.car.gas(action[1]) self.car.brake(action[2]) self.t += 1.0 / FPS self.steps_in_episode += 1 self.action_taken += 1 #render car and environment self.car.step(1.0 / FPS) self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) #generates new observation state #self.state[:,:,0] = self.render("state_pixels") # Old code, only one frame self._update_state(self.render("state_pixels")) ##REWARDS x, y = self.car.hull.position true_speed = np.sqrt( np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1])) / 100 wheel_angle = abs(self.car.wheels[0].joint.angle) / 0.4 done = False # reward given each step: step taken, distance to centerline, normalized speed [0-1], normalized steer angle [0-1] step_reward = self.f_reward[0] #reward distance to centerline, proportional to trackwidth dist = 1 - self._closest_dist([x, y], self.waypoints) / self.tr_width step_reward += self.f_reward[1] * np.clip(dist, -1, 1) #reward for speed step_reward += self.f_reward[2] * true_speed #reward for steer angle step_reward += self.f_reward[3] * wheel_angle #reward for collision with obstacle step_reward += self.f_reward[10] * self.obst_contact ## reward given on new tile touched: proportional of advance, %advance/steps_taken if self.newtile: step_reward += self.f_reward[4] * 100 / len(self.track) step_reward += self.f_reward[ 5] * self.tile_visited_count / self.steps_in_episode self.newtile = False ## calculates reward penalties, showstopper #reward for obstacles: obstacle hit (each step), obstacle collided (episode end) if self.end_on_contact and self.obst_contact: step_reward = self.f_reward[11] done = True if self.verbose > 0: print( self.track_use, " ended by collision. Steps", self.steps_in_episode, " %advance", int(self.tile_visited_count / len(self.track) * 1000) / 10, " played reward", int(100 * self.ep_return) / 100, " last penalty", step_reward) if self.verbose > 2: print(self.obst_contact_count, " collided obstacles: ", self.obst_contact_list) # reward given at episode end: all tiles touched (track finished), patience or off-raod exceeded, out of bounds, max_steps exceeded #if too many seconds lacking progress if self.t - self.last_new_tile > self.patience: step_reward = self.f_reward[7] done = True if self.verbose > 0: print( self.track_use, " cut by time without progress. Steps", self.steps_in_episode, " %advance", int(self.tile_visited_count / len(self.track) * 1000) / 10, " played reward", int(100 * self.ep_return) / 100, " last penalty", step_reward) #if too many seconds off-track if self.t - self.last_touch_with_track > self.off_track: step_reward = self.f_reward[7] done = True if self.verbose > 0: print( self.track_use, " cut by time off-track. Steps", self.steps_in_episode, " %advance", int(self.tile_visited_count / len(self.track) * 1000) / 10, " played reward", int(100 * self.ep_return) / 100, " last penalty", step_reward) #check out-of-bounds car position if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD: step_reward = self.f_reward[8] done = True if self.verbose > 0: print( self.track_use, " out of limits. Steps", self.steps_in_episode, " %advance", int(self.tile_visited_count / len(self.track) * 1000) / 10, " played reward", int(100 * self.ep_return) / 100, " last penalty", step_reward) #episode limit, as registered if self.steps_in_episode >= 2000: step_reward = self.f_reward[9] done = True if self.verbose > 0: print( self.track_use, " env max steps reached", self.steps_in_episode, " %advance", int(self.tile_visited_count / len(self.track) * 1000) / 10, " played reward", int(100 * self.ep_return) / 100, " last penalty", step_reward) #check touched all tiles, to finish if self.tile_visited_count == len(self.track): step_reward = self.f_reward[6] done = True if self.verbose > 0: print(self.track_use, " Finalized in Steps", self.steps_in_episode, " with return=total_reward", self.ep_return + step_reward) #clear reward if no action intended, from reset if action is None: step_reward = 0 done = False #internal counting reward, for display self.ep_return += step_reward return self.state, step_reward, done, { } #{'episode', self.tile_visited_count/len(self.track)} def render(self, mode='human'): assert mode in ['human', 'state_pixels', 'rgb_array'] if self.viewer is None: from gym.envs.classic_control import rendering self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) self.score_label = pyglet.text.Label( '00.0', font_size=24, x=10, y=WINDOW_H * 2.5 / 40.00, #2.5*H_INDI, anchor_x='left', anchor_y='center', color=(255, 255, 255, 255)) self.transform = rendering.Transform() if "t" not in self.__dict__: return # reset() not called yet if ZOOM_START: # Animate zoom during first second zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min( self.t, 1) else: zoom = ZOOM * SCALE if TRACK_FIRST and self.t == 0: #shows whole track in first frame; checks first step, from reset() self.transform.set_scale(TRACK_ZOOM, TRACK_ZOOM) self.transform.set_translation(WINDOW_W / 2, WINDOW_H / 2) self.transform.set_rotation(0) else: #every regular step updates the car visualization after action scroll_x = self.car.hull.position[0] scroll_y = self.car.hull.position[1] angle = -self.car.hull.angle vel = self.car.hull.linearVelocity if np.linalg.norm(vel) > 0.5: angle = math.atan2(vel[0], vel[1]) self.transform.set_scale(zoom, zoom) self.transform.set_translation( WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) - scroll_y * zoom * math.sin(angle)), WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) + scroll_y * zoom * math.cos(angle))) self.transform.set_rotation(angle) self.car.draw(self.viewer, mode != "state_pixels") #car_dynamics.draw particles only when not in state_pixels arr = None win = self.viewer.window win.switch_to() win.dispatch_events() win.clear() t = self.transform if mode == 'rgb_array': VP_W = VIDEO_W VP_H = VIDEO_H elif mode == 'state_pixels': VP_W = STATE_W VP_H = STATE_H else: pixel_scale = 1 if hasattr(win.context, '_nscontext'): pixel_scale = win.context._nscontext.view().backingScaleFactor( ) # pylint: disable=protected-access VP_W = int(pixel_scale * WINDOW_W) VP_H = int(pixel_scale * WINDOW_H) gl.glViewport(0, 0, VP_W, VP_H) t.enable() self._render_road() for geom in self.viewer.onetime_geoms: geom.render() self.viewer.onetime_geoms = [] t.disable() # plots the indicators if self.indicators and (not TRACK_FIRST or self.t >= 1.0 / FPS): # self._render_indicators(VP_W, VP_H) self._render_indicators(WINDOW_W, WINDOW_H) if mode == 'human': win.flip() return self.viewer.isopen image_data = pyglet.image.get_buffer_manager().get_color_buffer( ).get_image_data() arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep='') arr = arr.reshape(VP_H, VP_W, 4) if self.grayscale == 1: if self.frames_per_state > 1: arr = np.dot(arr[::-1, :, 0:3], [0.299, 0.587, 0.114]) else: arr = np.dot(arr[::-1, :, 0:3], [0.299, 0.587, 0.114]).reshape(VP_H, VP_W, -1) elif self.grayscale == 2: #arr = np.expand_dims(arr[:,:,1], axis=-1, dtype=np.uint8) if self.frames_per_state > 1: arr = arr[::-1, :, 1] else: arr = arr[::-1, :, 1].reshape(VP_H, VP_W, -1) else: arr = arr[::-1, :, 0:3] return arr def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None def screenshot(self, dest="./", name=None, quality='low'): ''' Saves the current state, quality 'low','medium' or 'high', low will save the current state if the quality is low, otherwise will save the current frame ''' if quality == 'low': state = self.state elif quality == 'medium': state = self.render('rgb_array') else: state = self.render("HD") if state is not None: for f in range(self.frames_per_state): if self.frames_per_state == 1 or quality != 'low': frame_str = "" frame = state else: frame_str = "_frame%i" % f frame = state[:, :, f] if self.grayscale: frame = np.stack([frame, frame, frame], axis=-1) frame = frame.astype(np.uint8) im = Image.fromarray(frame) if name == None: name = "screenshot_%0.3f" % self.t im.save("%s/%s%s.jpeg" % (dest, name, frame_str))
class CarRacing(gym.Env, EzPickle): metadata = { "render.modes": ["human", "rgb_array", "state_pixels", "track_vertex"], "video.frames_per_second": FPS, } def __init__(self, verbose=1): EzPickle.__init__(self) self.seed() self.contactListener_keepref = FrictionDetector(self) self.world = Box2D.b2World( (0, 0), contactListener=self.contactListener_keepref) self.viewer = None self.invisible_state_window = None self.invisible_video_window = None self.road = None self.grass = [] self.on_grass_idx = set() self.car = None self.reward = 0.0 self.prev_reward = 0.0 self.verbose = verbose self.poly = {'grass': [], 'road': [], 'other': []} self.action_space = spaces.Box(np.array([-1, 0, 0]), np.array([+1, +1, +1]), dtype=np.float32) # steer, gas, brake self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8) self.timer = 0 def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _destroy(self): if not self.road: return for t in self.road: self.world.DestroyBody(t) self.road = [] for t in self.grass: self.world.DestroyBody(t) self.grass = [] self.grass_idx = None self.car.destroy() def _create_track(self): CHECKPOINTS = 12 # Create checkpoints checkpoints = [] for c in range(CHECKPOINTS): alpha = 2 * math.pi * c / CHECKPOINTS + self.np_random.uniform( 0, 2 * math.pi * 1 / CHECKPOINTS) rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD) if c == 0: alpha = 0 rad = 1.5 * TRACK_RAD if c == CHECKPOINTS - 1: alpha = 2 * math.pi * c / CHECKPOINTS self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS rad = 1.5 * TRACK_RAD checkpoints.append( (alpha, rad * math.cos(alpha), rad * math.sin(alpha))) self.road = [] # Go from one checkpoint to another to create track x, y, beta = 1.5 * TRACK_RAD, 0, 0 dest_i = 0 laps = 0 track = [] no_freeze = 2500 visited_other_side = False while True: alpha = math.atan2(y, x) if visited_other_side and alpha > 0: laps += 1 visited_other_side = False if alpha < 0: visited_other_side = True alpha += 2 * math.pi while True: # Find destination from checkpoints failed = True while True: dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)] if alpha <= dest_alpha: failed = False break dest_i += 1 if dest_i % len(checkpoints) == 0: break if not failed: break alpha -= 2 * math.pi continue r1x = math.cos(beta) r1y = math.sin(beta) p1x = -r1y p1y = r1x dest_dx = dest_x - x # vector towards destination dest_dy = dest_y - y proj = r1x * dest_dx + r1y * dest_dy # destination vector projected on rad while beta - alpha > 1.5 * math.pi: beta -= 2 * math.pi while beta - alpha < -1.5 * math.pi: beta += 2 * math.pi prev_beta = beta proj *= SCALE if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001 * proj)) if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001 * proj)) x += p1x * TRACK_DETAIL_STEP y += p1y * TRACK_DETAIL_STEP track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y)) if laps > 4: break no_freeze -= 1 if no_freeze == 0: break # Find closed loop range i1..i2, first loop should be ignored, second is OK i1, i2 = -1, -1 i = len(track) while True: i -= 1 if i == 0: return False # Failed pass_through_start = (track[i][0] > self.start_alpha and track[i - 1][0] <= self.start_alpha) if pass_through_start and i2 == -1: i2 = i elif pass_through_start and i1 == -1: i1 = i break if self.verbose == 1: print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1)) assert i1 != -1 assert i2 != -1 track = track[i1:i2 - 1] first_beta = track[0][1] first_perp_x = math.cos(first_beta) first_perp_y = math.sin(first_beta) # Length of perpendicular jump to put together head and tail well_glued_together = np.sqrt( np.square(first_perp_x * (track[0][2] - track[-1][2])) + np.square(first_perp_y * (track[0][3] - track[-1][3]))) if well_glued_together > TRACK_DETAIL_STEP: return False # Red-white border on hard turns border = [False] * len(track) for i in range(len(track)): good = True oneside = 0 for neg in range(BORDER_MIN_COUNT): beta1 = track[i - neg - 0][1] beta2 = track[i - neg - 1][1] good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2 oneside += np.sign(beta1 - beta2) good &= abs(oneside) == BORDER_MIN_COUNT border[i] = good for i in range(len(track)): for neg in range(BORDER_MIN_COUNT): border[i - neg] |= border[i] # Create tiles for i in range(len(track)): alpha1, beta1, x1, y1 = track[i] alpha2, beta2, x2, y2 = track[i - 1] # Grass fixtures road1_l = (x1 - TRACK_GRASS_WIDTH * math.cos(beta1), y1 - TRACK_GRASS_WIDTH * math.sin(beta1)) road1_r = (x1 + TRACK_GRASS_WIDTH * math.cos(beta1), y1 + TRACK_GRASS_WIDTH * math.sin(beta1)) road2_l = (x2 - TRACK_GRASS_WIDTH * math.cos(beta2), y2 - TRACK_GRASS_WIDTH * math.sin(beta2)) road2_r = (x2 + TRACK_GRASS_WIDTH * math.cos(beta2), y2 + TRACK_GRASS_WIDTH * math.sin(beta2)) self.poly['grass'].append(([road1_l, road1_r, road2_r, road2_l], GRASS_COLOR)) t = self.world.CreateStaticBody( fixtures=fixtureDef(shape=polygonShape( vertices=[road1_l, road1_r, road2_r, road2_l]), isSensor=True)) t.userData = t t.userData.grass_idx = len(self.grass) self.grass.append(t) # Road fixtures road1_l = (x1 - TRACK_WIDTH * math.cos(beta1), y1 - TRACK_WIDTH * math.sin(beta1)) road1_r = (x1 + TRACK_WIDTH * math.cos(beta1), y1 + TRACK_WIDTH * math.sin(beta1)) road2_l = (x2 - TRACK_WIDTH * math.cos(beta2), y2 - TRACK_WIDTH * math.sin(beta2)) road2_r = (x2 + TRACK_WIDTH * math.cos(beta2), y2 + TRACK_WIDTH * math.sin(beta2)) t = self.world.CreateStaticBody( fixtures=fixtureDef(shape=polygonShape( vertices=[road1_l, road1_r, road2_r, road2_l]), isSensor=True)) t.userData = t t.road_visited = False t.road_friction = 1.0 # Vary the colour of the road c = 0.01 * (i % 3) t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c] self.poly['road'].append(([road1_l, road1_r, road2_r, road2_l], t.color)) self.road.append(t) # Red/white borders (only occur in the render) if border[i]: side = np.sign(beta2 - beta1) b1_l = (x1 + side * TRACK_WIDTH * math.cos(beta1), y1 + side * TRACK_WIDTH * math.sin(beta1)) b1_r = (x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1), y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1)) b2_l = (x2 + side * TRACK_WIDTH * math.cos(beta2), y2 + side * TRACK_WIDTH * math.sin(beta2)) b2_r = (x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2), y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2)) self.poly['other'].append(([b1_l, b1_r, b2_r, b2_l], (1, 1, 1) if i % 2 == 0 else (1, 0, 0))) self.track = track return True def reset(self): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.poly = {'grass': [], 'road': [], 'other': []} self.on_grass_idx.clear() while True: success = self._create_track() if success: break if self.verbose == 1: print( "retry to generate track (normal if there are not many instances of this message)" ) self.car = Car(self.world, *self.track[0][1:4]) return self.step(None)[0] def step(self, action): if action is not None: self.car.steer(-action[0]) self.car.gas(action[1]) self.car.brake(action[2]) self.car.step(1.0 / FPS) self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) self.t += 1.0 / FPS self.state = self.render("state_pixels") step_reward = 0 done = False if action is not None: # First step without action, called from reset() self.reward -= 0.1 # We actually don't want to count fuel spent, we want car to be faster. # self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER self.car.fuel_spent = 0.0 step_reward = self.reward - self.prev_reward self.prev_reward = self.reward if self.tile_visited_count == len(self.track): done = True x, y = self.car.hull.position if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD or len( self.on_grass_idx) == 0: done = True step_reward = -100 return self.state, step_reward, done, {} def render(self, mode='human'): assert mode in ["human", "state_pixels", "rgb_array", "track_vertex"] # Note that the verticies are arranged in order of how close each tile is. # But there is no guarantee in the order of which side (left or right) is returned. # And the first tile returned may include the both right and then both left vertices of the quad visible_road_vertices = self.getRoadVertices() #if self.car is not None and self.car.hull is not None: #local_v = np.array([self.car.hull.GetLocalPoint(v) for v in visible_road_vertices]) #local_v = np.c_[local_v, np.linalg.norm(local_v, axis=1)] #if self.timer % 20 == 0: # np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)}) # print(local_v) #self.timer += 1 if mode == "track_vertex": if self.car is None or self.car.hull is None: return None local_v = np.array([ self.car.hull.GetLocalPoint(v) for v in visible_road_vertices ]) # Return two vectors of each side of the track relative to the frame of the car # Also return the following wheel parameters: steer, gas, brake, speed and vehicle speed (forward and sideways) arr = np.array([ self.car.wheels[0].steer, self.car.wheels[2].gas, self.car.wheels[0].brake, self.car.wheels[0].vr, self.car.wheels[1].vr, self.car.wheels[2].vr, self.car.wheels[3].vr, self.car.wheels[2].vf, self.car.wheels[2].vs ]) arr = np.r_[arr, local_v[:, 0], local_v[:, 1]] return arr if self.viewer is None: from gym.envs.classic_control import rendering self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) self.score_label = pyglet.text.Label('0000', font_size=36, x=20, y=WINDOW_H * 2.5 / 40.00, anchor_x='left', anchor_y='center', color=(255, 255, 255, 255)) self.transform = rendering.Transform() if not ZOOM_FOLLOW: zoom = WINDOW_H / (2 * PLAYFIELD) self.transform.set_scale(zoom, zoom) self.transform.set_translation(WINDOW_W / 2, WINDOW_H / 2) self.transform.set_rotation(0) if "t" not in self.__dict__: return # reset() not called yet if ZOOM_FOLLOW: # Zoom starts at 0.1*SCALE and ends at ZOOM*SCALE zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min( self.t, 1) # Animate zoom first second scroll_x = self.car.hull.position[0] scroll_y = self.car.hull.position[1] angle = -self.car.hull.angle vel = self.car.hull.linearVelocity if np.linalg.norm(vel) > 0.5: angle = math.atan2(vel[0], vel[1]) self.transform.set_scale(zoom, zoom) self.transform.set_translation( WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) - scroll_y * zoom * math.sin(angle)), WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) + scroll_y * zoom * math.cos(angle))) self.transform.set_rotation(angle) self.car.draw(self.viewer, mode != "state_pixels") arr = None win = self.viewer.window win.switch_to() win.dispatch_events() # scale the viewport to output at different sizes for ML win.clear() t = self.transform if mode == 'rgb_array': VP_W = VIDEO_W VP_H = VIDEO_H elif mode == 'state_pixels': VP_W = STATE_W VP_H = STATE_H else: pixel_scale = 1 # an NSOpenGLContext seems to be something in Mac # The following line is trying to scale based on Mac's scaling settings if hasattr(win.context, '_nscontext'): pixel_scale = win.context._nscontext.view().backingScaleFactor( ) # pylint: disable=protected-access VP_W = int(pixel_scale * WINDOW_W) VP_H = int(pixel_scale * WINDOW_H) gl.glViewport(0, 0, VP_W, VP_H) t.enable() self.render_road(visible_road_vertices) for geom in self.viewer.onetime_geoms: geom.render() self.viewer.onetime_geoms = [] t.disable() self.render_indicators(WINDOW_W, WINDOW_H) if mode == 'human': win.flip() return self.viewer.isopen image_data = pyglet.image.get_buffer_manager().get_color_buffer( ).get_image_data() arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep='') arr = arr.reshape(VP_H, VP_W, 4) arr = arr[::-1, :, 0:3] return arr def getRoadVertices(self): vertices = [] vertices2 = [] if self.road is not None and len(self.on_grass_idx) > 0: tile_idx = max(self.on_grass_idx) for i, t in enumerate(self.road): if tile_idx is None: break if i >= tile_idx and i < tile_idx + LOOK_AHEAD: vertices.extend(t.fixtures[0].shape.vertices) if i < (tile_idx + LOOK_AHEAD) - len(self.road): # We're adding this separately as we need to ensure these get added to the end of the list vertices2.extend(t.fixtures[0].shape.vertices) vertices.extend(vertices2) # Remove duplicates in the road vertices # This works because dictionaries can't have duplicate keys vertices = list(dict.fromkeys(vertices)) return vertices def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None def render_road(self, visible_vertices): # Draw background colors = [0.1, 0.1, 0.1, 1.0] * 4 polygons_ = [ +PLAYFIELD, +PLAYFIELD, 0, +PLAYFIELD, -PLAYFIELD, 0, -PLAYFIELD, -PLAYFIELD, 0, -PLAYFIELD, +PLAYFIELD, 0, ] # Draw grass if len(self.on_grass_idx) == 0: grass_idx = None # this only occurs just before a reset when you drive off the track else: grass_idx = max(self.on_grass_idx) for i, (poly, color) in enumerate(self.poly['grass']): if grass_idx is not None and ( (i >= grass_idx and i < grass_idx + LOOK_AHEAD) or i < (grass_idx + LOOK_AHEAD) - len(self.poly['grass'])): color = VISIBLE_ROAD_COLOR colors.extend([*color, 1] * len(poly)) for p in poly: polygons_.extend([p[0], p[1], 0]) # Draw road and other items (like the red/white borders) for key in ['road', 'other']: for poly, color in self.poly[key]: colors.extend([*color, 1] * len(poly)) for p in poly: polygons_.extend([p[0], p[1], 0]) vl = pyglet.graphics.vertex_list( len(polygons_) // 3, ("v3f", polygons_), ("c4f", colors)) vl.draw(gl.GL_QUADS) # Draw the visible vertices colors = [] points = [] for i, v in enumerate(visible_vertices): colors.extend([1 - i / len(visible_vertices), 0, 0, 1.0]) points.extend([v[0], v[1], 0]) v2 = pyglet.graphics.vertex_list( len(points) // 3, ("v3f", points), ("c4f", colors)) pyglet.gl.glPointSize(5) v2.draw(gl.GL_POINTS) pyglet.gl.glPointSize(1) def render_indicators(self, W, H): s = W / 40.0 h = H / 40.0 colors = [0, 0, 0, 1] * 4 polygons = [W, 0, 0, W, 5 * h, 0, 0, 5 * h, 0, 0, 0, 0] def vertical_ind(place, val, color): colors.extend([color[0], color[1], color[2], 1] * 4) polygons.extend([ place * s, h + h * val, 0, (place + 1) * s, h + h * val, 0, (place + 1) * s, h, 0, (place + 0) * s, h, 0, ]) def horiz_ind(place, val, color): colors.extend([color[0], color[1], color[2], 1] * 4) polygons.extend([(place + 0) * s, 4 * h, 0, (place + val) * s, 4 * h, 0, (place + val) * s, 2 * h, 0, (place + 0) * s, 2 * h, 0]) true_speed = np.sqrt( np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1])) vertical_ind(5, 0.02 * true_speed, (1, 1, 1)) vertical_ind(7, 0.01 * self.car.wheels[0].omega, (0.0, 0, 1)) # ABS sensors vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1)) vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1)) vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1)) horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0)) horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0)) vl = pyglet.graphics.vertex_list( len(polygons) // 3, ("v3f", polygons), ("c4f", colors) # gl.GL_QUADS, ) vl.draw(gl.GL_QUADS) self.score_label.text = "%04i" % self.reward self.score_label.draw()
class CarRacingFix: assert gym.__version__ <= '0.17.1' def __init__(self, verbose=1): self.contactListener_keep_ref = FrictionDetector(self) self.world = Box2D.b2World( (0, 0), contactListener=self.contactListener_keep_ref) self.viewer = None self.invisible_state_window = None self.invisible_video_window = None self.road = None self.car = None self.reward = 0.0 self.prev_reward = 0.0 self.verbose = verbose self.fd_tile = Box2D.b2FixtureDef(shape=Box2D.b2PolygonShape( vertices=[(0, 0), (1, 0), (1, -1), (0, -1)])) self.state_temp = None # Yonv1943 self.tile_visited_count = 0 self.road_poly = [] self.transform = None self.t = None self.num_step = 0 self.env_name = 'CarRacingFix' self.state_dim = (STATE_W, STATE_H, 3 * 2) self.action_dim = 6 self.if_discrete = False self.max_step = 512 self.target_return = 950 self.action_max = 1 def reset(self): self.num_step = 1 self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] while True: success = self._create_track() if success: break # if self.verbose == 1: # print("retry to generate track (normal if there are not many of this messages)") self.car = Car(self.world, *self.track[0][1:4]) self.state_temp = np.zeros((STATE_W, STATE_H, 3), dtype=np.uint8) return self.old_step((0, 0, 0))[0] def step(self, action): try: reward0 = self.old_step(action[:3], if_draw=False)[1] state, reward1, done, info_dict = self.old_step(action[3:], if_draw=True) reward = reward0 + reward1 except Exception as error: print(f"| CarRacingFix Error: {error}") state = np.stack((self.state_temp, self.state_temp)) reward = 0 done = True info_dict = dict() self.num_step += 1 if self.num_step == self.max_step: done = True return state, reward, done, info_dict def old_step(self, action, if_draw=True): self.car.steer(action[0]) self.car.gas(action[1]) # np.clip(gas, 0, 1) self.car.brake(action[2]) self.car.step(1.0 / FPS) self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) self.t += 1.0 / FPS self.reward -= 0.1 # We actually don't want to count fuel spent, we want car to be faster. # self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER # self.car.fuel_spent = 0.0 done = False # x, y = self.car.hull.position # if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD: # done = True # step_reward = -100 # Ynv1943: it is a bad design if if_draw: state = self.render("state_pixels") if not (32 < state[16:96, 16:96, 1].mean() < 211): # penalize when outside of road # print(f"{state[16:96, 16:96, 1].mean():.3f}") self.reward -= 2.0 done = True if self.tile_visited_count == len(self.track): done = True stack_state = np.concatenate((self.state_temp, state), axis=2) self.state_temp = state else: stack_state = None step_reward = self.reward - self.prev_reward self.prev_reward = self.reward return stack_state, step_reward, done, {} def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None def render(self, mode='human'): assert mode in ['human', 'state_pixels'] if self.viewer is None: from gym.envs.classic_control import rendering self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) # self.score_label = pyglet.text.Label('0000', font_size=36, # x=20, y=WINDOW_H * 2.5 / 40.00, anchor_x='left', anchor_y='center', # color=(255, 255, 255, 255)) self.transform = rendering.Transform() if self.t is None: return None zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min( self.t, 1) # Animate zoom first second scroll_x = self.car.hull.position[0] scroll_y = self.car.hull.position[1] angle = -self.car.hull.angle vel = self.car.hull.linearVelocity if np.linalg.norm(vel) > 0.5: angle = math.atan2(vel[0], vel[1]) self.transform.set_scale(zoom, zoom) self.transform.set_translation( WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) - scroll_y * zoom * math.sin(angle)), WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) + scroll_y * zoom * math.cos(angle))) self.transform.set_rotation(angle) self.car.draw(self.viewer, mode != "state_pixels") win = self.viewer.window win.switch_to() win.dispatch_events() win.clear() t = self.transform if mode == 'state_pixels': vp_w = STATE_W vp_h = STATE_H else: context_nscontext = getattr(win.context, '_nscontext', None) pixel_scale = 1 if context_nscontext is None else context_nscontext.view( ).backingScaleFactor() # pylint: disable=protected-access vp_w = int(pixel_scale * WINDOW_W) vp_h = int(pixel_scale * WINDOW_H) gl.glViewport(0, 0, vp_w, vp_h) t.enable() self.render_road() for geom in self.viewer.onetime_geoms: geom.render() self.viewer.onetime_geoms = [] t.disable() # self.render_indicators(WINDOW_W, WINDOW_H) if mode == 'human': win.flip() return self.viewer.isopen image_data = pyglet.image.get_buffer_manager().get_color_buffer( ).get_image_data() arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep='') arr = arr.reshape((vp_h, vp_w, 4))[:, :, :3] return arr def render_road(self): gl.glBegin(gl.GL_QUADS) gl.glColor4f(0.4, 0.8, 0.4, 1.0) gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0) gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0) gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0) gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0) gl.glColor4f(0.4, 0.9, 0.4, 1.0) k = PLAYFIELD / 20.0 for x in range(-20, 20, 2): for y in range(-20, 20, 2): gl.glVertex3f(k * x + k, k * y + 0, 0) gl.glVertex3f(k * x + 0, k * y + 0, 0) gl.glVertex3f(k * x + 0, k * y + k, 0) gl.glVertex3f(k * x + k, k * y + k, 0) for poly, color in self.road_poly: gl.glColor4f(color[0], color[1], color[2], 1) for p in poly: gl.glVertex3f(p[0], p[1], 0) gl.glEnd() def render_indicators(self, w, h): gl.glBegin(gl.GL_QUADS) s = w / 40.0 h = h / 40.0 gl.glColor4f(0, 0, 0, 1) gl.glVertex3f(w, 0, 0) gl.glVertex3f(w, 5 * h, 0) gl.glVertex3f(0, 5 * h, 0) gl.glVertex3f(0, 0, 0) def vertical_ind(place, val, color): gl.glColor4f(color[0], color[1], color[2], 1) gl.glVertex3f((place + 0) * s, h + h * val, 0) gl.glVertex3f((place + 1) * s, h + h * val, 0) gl.glVertex3f((place + 1) * s, h, 0) gl.glVertex3f((place + 0) * s, h, 0) def horiz_ind(place, val, color): gl.glColor4f(color[0], color[1], color[2], 1) gl.glVertex3f((place + 0) * s, 4 * h, 0) gl.glVertex3f((place + val) * s, 4 * h, 0) gl.glVertex3f((place + val) * s, 2 * h, 0) gl.glVertex3f((place + 0) * s, 2 * h, 0) true_speed = np.sqrt( np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1])) vertical_ind(5, 0.02 * true_speed, (1, 1, 1)) vertical_ind(7, 0.01 * self.car.wheels[0].omega, (0.0, 0, 1)) # ABS sensors vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1)) vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1)) vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1)) horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0)) horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0)) gl.glEnd() # self.score_label.text = "%04i" % self.reward # self.score_label.draw() def _destroy(self): if not self.road: return for t in self.road: self.world.DestroyBody(t) self.road = [] self.car.destroy() def _create_track(self): check_point = 12 # Create checkpoints checkpoints = [] for c in range(check_point): alpha = 2 * math.pi * c / check_point + rd.uniform( 0, 2 * math.pi * 1 / check_point) rad = rd.uniform(TRACK_RAD / 3, TRACK_RAD) if c == 0: alpha = 0 rad = 1.5 * TRACK_RAD if c == check_point - 1: alpha = 2 * math.pi * c / check_point self.start_alpha = 2 * math.pi * (-0.5) / check_point rad = 1.5 * TRACK_RAD checkpoints.append( (alpha, rad * math.cos(alpha), rad * math.sin(alpha))) # print "\n".join(str(h) for h in checkpoints) # self.road_poly = [ ( # uncomment this to see checkpoints # [ (tx,ty) for a,tx,ty in checkpoints ], # (0.7,0.7,0.9) ) ] self.road = [] # Go from one checkpoint to another to create track x, y, beta = 1.5 * TRACK_RAD, 0, 0 dest_i = 0 laps = 0 track = [] no_freeze = 2500 visited_other_side = False while True: alpha = math.atan2(y, x) if visited_other_side and alpha > 0: laps += 1 visited_other_side = False if alpha < 0: visited_other_side = True alpha += 2 * math.pi while True: # Find destination from checkpoints failed = True while True: dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)] if alpha <= dest_alpha: failed = False break dest_i += 1 if dest_i % len(checkpoints) == 0: break if not failed: break alpha -= 2 * math.pi continue r1x = math.cos(beta) r1y = math.sin(beta) p1x = -r1y p1y = r1x dest_dx = dest_x - x # vector towards destination dest_dy = dest_y - y proj = r1x * dest_dx + r1y * dest_dy # destination vector projected on rad while beta - alpha > 1.5 * math.pi: beta -= 2 * math.pi while beta - alpha < -1.5 * math.pi: beta += 2 * math.pi prev_beta = beta proj *= SCALE if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001 * proj)) if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001 * proj)) x += p1x * TRACK_DETAIL_STEP y += p1y * TRACK_DETAIL_STEP track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y)) if laps > 4: break no_freeze -= 1 if no_freeze == 0: break # print "\n".join([str(t) for t in enumerate(track)]) # Find closed loop range i1..i2, first loop should be ignored, second is OK i1, i2 = -1, -1 i = len(track) while True: i -= 1 if i == 0: return False # Failed pass_through_start = track[i][0] > self.start_alpha >= track[i - 1][0] if pass_through_start and i2 == -1: i2 = i elif pass_through_start and i1 == -1: i1 = i break # if self.verbose == 1: # Yonv1943 # print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1)) assert i1 != -1 assert i2 != -1 track = track[i1:i2 - 1] first_beta = track[0][1] first_perp_x = math.cos(first_beta) first_perp_y = math.sin(first_beta) # Length of perpendicular jump to put together head and tail well_glued_together = np.sqrt( np.square(first_perp_x * (track[0][2] - track[-1][2])) + np.square(first_perp_y * (track[0][3] - track[-1][3]))) if well_glued_together > TRACK_DETAIL_STEP: return False # Red-white border on hard turns border = [False] * len(track) for i in range(len(track)): good = True oneside = 0 for neg in range(BORDER_MIN_COUNT): beta1 = track[i - neg - 0][1] beta2 = track[i - neg - 1][1] good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2 oneside += np.sign(beta1 - beta2) good &= abs(oneside) == BORDER_MIN_COUNT border[i] = good for i in range(len(track)): for neg in range(BORDER_MIN_COUNT): border[i - neg] |= border[i] # Create tiles for i in range(len(track)): alpha1, beta1, x1, y1 = track[i] alpha2, beta2, x2, y2 = track[i - 1] road1_l = (x1 - TRACK_WIDTH * math.cos(beta1), y1 - TRACK_WIDTH * math.sin(beta1)) road1_r = (x1 + TRACK_WIDTH * math.cos(beta1), y1 + TRACK_WIDTH * math.sin(beta1)) road2_l = (x2 - TRACK_WIDTH * math.cos(beta2), y2 - TRACK_WIDTH * math.sin(beta2)) road2_r = (x2 + TRACK_WIDTH * math.cos(beta2), y2 + TRACK_WIDTH * math.sin(beta2)) vertices = [road1_l, road1_r, road2_r, road2_l] self.fd_tile.shape.vertices = vertices t = self.world.CreateStaticBody(fixtures=self.fd_tile) t.userData = t c = 0.01 * (i % 3) t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c] t.road_visited = False t.road_friction = 1.0 t.fixtures[0].sensor = True self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color)) self.road.append(t) if border[i]: side = np.sign(beta2 - beta1) b1_l = (x1 + side * TRACK_WIDTH * math.cos(beta1), y1 + side * TRACK_WIDTH * math.sin(beta1)) b1_r = (x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1), y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1)) b2_l = (x2 + side * TRACK_WIDTH * math.cos(beta2), y2 + side * TRACK_WIDTH * math.sin(beta2)) b2_r = (x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2), y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2)) self.road_poly.append(([b1_l, b1_r, b2_r, b2_l], (1, 1, 1) if i % 2 == 0 else (1, 0, 0))) self.track = track return True
class CarRacing(gym.Env, EzPickle): metadata = { 'render.modes': ['human', 'rgb_array', 'state_pixels'], 'video.frames_per_second': FPS } def __init__(self, verbose=1): EzPickle.__init__(self) self.seed() self.contactListener_keepref = FrictionDetector(self) self.world = Box2D.b2World( (0, 0), contactListener=self.contactListener_keepref) self.viewer = None self.invisible_state_window = None self.invisible_video_window = None self.road = None self.car = None self.reward = 0.0 self.prev_reward = 0.0 self.verbose = verbose self.possible_actions = ("NOTHING", "LEFT", "RIGHT", "ACCELERATE", "BREAK") self.fd_tile = fixtureDef(shape=polygonShape( vertices=[(0, 0), (1, 0), (1, -1), (0, -1)])) # Discrete action space self.action_space = spaces.Discrete(len(self.possible_actions)) # Frames per state frames_per_state = 4 state_shape = tuple([STATE_H, STATE_W, frames_per_state]) # Shapes and state lst = list(range(frames_per_state)) self._update_index = [lst[-1]] + lst[:-1] self.observation_space = spaces.Box(low=0, high=255, shape=state_shape, dtype=np.uint8) self.state = np.zeros(self.observation_space.shape) # No reward early abort self._last_rewards_size = 2 * FPS def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _destroy(self): if not self.road: return for t in self.road: self.world.DestroyBody(t) self.road = [] self.car.destroy() def _create_track(self): CHECKPOINTS = 12 # Create checkpoints checkpoints = [] for c in range(CHECKPOINTS): alpha = 2 * math.pi * c / CHECKPOINTS + self.np_random.uniform( 0, 2 * math.pi * 1 / CHECKPOINTS) rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD) if c == 0: alpha = 0 rad = 1.5 * TRACK_RAD if c == CHECKPOINTS - 1: alpha = 2 * math.pi * c / CHECKPOINTS self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS rad = 1.5 * TRACK_RAD checkpoints.append( (alpha, rad * math.cos(alpha), rad * math.sin(alpha))) self.road = [] # Go from one checkpoint to another to create track x, y, beta = 1.5 * TRACK_RAD, 0, 0 dest_i = 0 laps = 0 track = [] no_freeze = 2500 visited_other_side = False while True: alpha = math.atan2(y, x) if visited_other_side and alpha > 0: laps += 1 visited_other_side = False if alpha < 0: visited_other_side = True alpha += 2 * math.pi while True: # Find destination from checkpoints failed = True while True: dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)] if alpha <= dest_alpha: failed = False break dest_i += 1 if dest_i % len(checkpoints) == 0: break if not failed: break alpha -= 2 * math.pi continue r1x = math.cos(beta) r1y = math.sin(beta) p1x = -r1y p1y = r1x dest_dx = dest_x - x # vector towards destination dest_dy = dest_y - y proj = r1x * dest_dx + r1y * dest_dy # destination vector projected on rad while beta - alpha > 1.5 * math.pi: beta -= 2 * math.pi while beta - alpha < -1.5 * math.pi: beta += 2 * math.pi prev_beta = beta proj *= SCALE if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001 * proj)) if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001 * proj)) x += p1x * TRACK_DETAIL_STEP y += p1y * TRACK_DETAIL_STEP track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y)) if laps > 4: break no_freeze -= 1 if no_freeze == 0: break # Find closed loop range i1..i2, first loop should be ignored, second is OK i1, i2 = -1, -1 i = len(track) while True: i -= 1 if i == 0: return False # Failed pass_through_start = track[i][0] > self.start_alpha and track[ i - 1][0] <= self.start_alpha if pass_through_start and i2 == -1: i2 = i elif pass_through_start and i1 == -1: i1 = i break if self.verbose == 1: print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1)) assert i1 != -1 assert i2 != -1 track = track[i1:i2 - 1] first_beta = track[0][1] first_perp_x = math.cos(first_beta) first_perp_y = math.sin(first_beta) # Length of perpendicular jump to put together head and tail well_glued_together = np.sqrt( np.square(first_perp_x * (track[0][2] - track[-1][2])) + np.square(first_perp_y * (track[0][3] - track[-1][3]))) if well_glued_together > TRACK_DETAIL_STEP: return False # Red-white border on hard turns border = [False] * len(track) for i in range(len(track)): good = True oneside = 0 for neg in range(BORDER_MIN_COUNT): beta1 = track[i - neg - 0][1] beta2 = track[i - neg - 1][1] good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2 oneside += np.sign(beta1 - beta2) good &= abs(oneside) == BORDER_MIN_COUNT border[i] = good for i in range(len(track)): for neg in range(BORDER_MIN_COUNT): border[i - neg] |= border[i] # Create tiles for i in range(len(track)): alpha1, beta1, x1, y1 = track[i] alpha2, beta2, x2, y2 = track[i - 1] road1_l = (x1 - TRACK_WIDTH * math.cos(beta1), y1 - TRACK_WIDTH * math.sin(beta1)) road1_r = (x1 + TRACK_WIDTH * math.cos(beta1), y1 + TRACK_WIDTH * math.sin(beta1)) road2_l = (x2 - TRACK_WIDTH * math.cos(beta2), y2 - TRACK_WIDTH * math.sin(beta2)) road2_r = (x2 + TRACK_WIDTH * math.cos(beta2), y2 + TRACK_WIDTH * math.sin(beta2)) vertices = [road1_l, road1_r, road2_r, road2_l] self.fd_tile.shape.vertices = vertices t = self.world.CreateStaticBody(fixtures=self.fd_tile) t.userData = t c = 0.01 * (i % 3) t.color = [ROAD_COLOR[0] + c, ROAD_COLOR[1] + c, ROAD_COLOR[2] + c] t.road_visited = False t.road_friction = 1.0 t.fixtures[0].sensor = True self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color)) self.road.append(t) if border[i]: side = np.sign(beta2 - beta1) b1_l = (x1 + side * TRACK_WIDTH * math.cos(beta1), y1 + side * TRACK_WIDTH * math.sin(beta1)) b1_r = (x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1), y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1)) b2_l = (x2 + side * TRACK_WIDTH * math.cos(beta2), y2 + side * TRACK_WIDTH * math.sin(beta2)) b2_r = (x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2), y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2)) self.road_poly.append(([b1_l, b1_r, b2_r, b2_l], (1, 1, 1) if i % 2 == 0 else (1, 0, 0))) self.track = track return True def reset(self): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] self.state = np.zeros(self.observation_space.shape) self._last_rewards = [] while True: success = self._create_track() if success: break if self.verbose == 1: print( "retry to generate track (normal if there are not many instances of this message)" ) self.car = Car(self.world, *self.track[0][1:4]) # there are 20 frames of noise at the begining (+ 4 frames per state) for _ in range(24): obs = self.step(None)[0] return obs def _update_state(self, new_frame): self.state[:, :, -1] = new_frame self.state = self.state[:, :, self._update_index] def _transform_action(self, action): if action == 0: action = [0, 0, 0.0] # Nothing if action == 1: action = [-1, 0, 0.0] # Left if action == 2: action = [+1, 0, 0.0] # Right if action == 3: action = [0, +1, 0.0] # Accelerate if action == 4: action = [0, 0, 0.8] # break return action def step(self, action): action = self._transform_action(action) if action is not None: self.car.steer(-action[0]) self.car.gas(action[1]) self.car.brake(action[2]) self.car.step(1.0 / FPS) self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) self.t += 1.0 / FPS self._update_state(self.render("state_pixels")) step_reward = 0 done = False fail = False if action is not None: # First step without action, called from reset() self.reward -= 0.1 # We actually don't want to count fuel spent, we want car to be faster. # self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER self.car.fuel_spent = 0.0 step_reward = self.reward - self.prev_reward self.prev_reward = self.reward x, y = self.car.hull.position # Track done if self.tile_visited_count == len(self.track): done = True # Car out of playfield elif abs(x) > PLAYFIELD or abs(y) > PLAYFIELD: if self.verbose == 1: print("Killed because out of playing field") fail = True # If too good or too bad elif self.reward > 1000 or self.reward < -200: if self.verbose == 1: print("Killed because of too low or too high reward") fail = True # Early abort when no points were gained recently elif len(self._last_rewards) == self._last_rewards_size and max( self._last_rewards) <= 0: if self.verbose == 1: print("Killed because of no recent progress") fail = True if fail: done = True step_reward = -100 self._last_rewards.append(step_reward) if len(self._last_rewards) > self._last_rewards_size: self._last_rewards.pop(0) return self.state, step_reward, done, {} def render(self, mode='human'): assert mode in ['human', 'state_pixels', 'rgb_array'] if self.viewer is None: from gym.envs.classic_control import rendering self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) self.score_label = pyglet.text.Label('0000', font_size=36, x=20, y=WINDOW_H * 2.5 / 40.00, anchor_x='left', anchor_y='center', color=(255, 255, 255, 255)) self.transform = rendering.Transform() if "t" not in self.__dict__: return # reset() not called yet zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min( self.t, 1) # Animate zoom first second scroll_x = self.car.hull.position[0] scroll_y = self.car.hull.position[1] angle = -self.car.hull.angle vel = self.car.hull.linearVelocity if np.linalg.norm(vel) > 0.5: angle = math.atan2(vel[0], vel[1]) self.transform.set_scale(zoom, zoom) self.transform.set_translation( WINDOW_W / 2 - (scroll_x * zoom * math.cos(angle) - scroll_y * zoom * math.sin(angle)), WINDOW_H / 4 - (scroll_x * zoom * math.sin(angle) + scroll_y * zoom * math.cos(angle))) self.transform.set_rotation(angle) self.car.draw(self.viewer, mode != "state_pixels") arr = None win = self.viewer.window win.switch_to() win.dispatch_events() win.clear() t = self.transform if mode == 'rgb_array': VP_W = VIDEO_W VP_H = VIDEO_H elif mode == 'state_pixels': VP_W = STATE_W VP_H = STATE_H else: pixel_scale = 1 if hasattr(win.context, '_nscontext'): pixel_scale = win.context._nscontext.view().backingScaleFactor( ) # pylint: disable=protected-access VP_W = int(pixel_scale * WINDOW_W) VP_H = int(pixel_scale * WINDOW_H) gl.glViewport(0, 0, VP_W, VP_H) t.enable() self.render_road() for geom in self.viewer.onetime_geoms: geom.render() self.viewer.onetime_geoms = [] t.disable() # Don't show indicators #self.render_indicators(WINDOW_W, WINDOW_H) if mode == 'human': win.flip() return self.viewer.isopen image_data = pyglet.image.get_buffer_manager().get_color_buffer( ).get_image_data() arr = np.fromstring(image_data.get_data(), dtype=np.uint8, sep='') arr = arr.reshape(VP_H, VP_W, 4) arr = arr[::-1, :, 0:3] # Convert to grayscale if mode == 'state_pixels': arr = np.dot(arr[..., :3], [0.299, 0.587, 0.114]) return arr def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None def render_road(self): gl.glBegin(gl.GL_QUADS) gl.glColor4f(0.4, 0.8, 0.4, 1.0) gl.glVertex3f(-PLAYFIELD, +PLAYFIELD, 0) gl.glVertex3f(+PLAYFIELD, +PLAYFIELD, 0) gl.glVertex3f(+PLAYFIELD, -PLAYFIELD, 0) gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0) gl.glColor4f(0.4, 0.9, 0.4, 1.0) k = PLAYFIELD / 20.0 for x in range(-20, 20, 2): for y in range(-20, 20, 2): gl.glVertex3f(k * x + k, k * y + 0, 0) gl.glVertex3f(k * x + 0, k * y + 0, 0) gl.glVertex3f(k * x + 0, k * y + k, 0) gl.glVertex3f(k * x + k, k * y + k, 0) for poly, color in self.road_poly: gl.glColor4f(color[0], color[1], color[2], 1) for p in poly: gl.glVertex3f(p[0], p[1], 0) gl.glEnd() def render_indicators(self, W, H): gl.glBegin(gl.GL_QUADS) s = W / 40.0 h = H / 40.0 gl.glColor4f(0, 0, 0, 1) gl.glVertex3f(W, 0, 0) gl.glVertex3f(W, 5 * h, 0) gl.glVertex3f(0, 5 * h, 0) gl.glVertex3f(0, 0, 0) def vertical_ind(place, val, color): gl.glColor4f(color[0], color[1], color[2], 1) gl.glVertex3f((place + 0) * s, h + h * val, 0) gl.glVertex3f((place + 1) * s, h + h * val, 0) gl.glVertex3f((place + 1) * s, h, 0) gl.glVertex3f((place + 0) * s, h, 0) def horiz_ind(place, val, color): gl.glColor4f(color[0], color[1], color[2], 1) gl.glVertex3f((place + 0) * s, 4 * h, 0) gl.glVertex3f((place + val) * s, 4 * h, 0) gl.glVertex3f((place + val) * s, 2 * h, 0) gl.glVertex3f((place + 0) * s, 2 * h, 0) true_speed = np.sqrt( np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1])) vertical_ind(5, 0.02 * true_speed, (1, 1, 1)) vertical_ind(7, 0.01 * self.car.wheels[0].omega, (0.0, 0, 1)) # ABS sensors vertical_ind(8, 0.01 * self.car.wheels[1].omega, (0.0, 0, 1)) vertical_ind(9, 0.01 * self.car.wheels[2].omega, (0.2, 0, 1)) vertical_ind(10, 0.01 * self.car.wheels[3].omega, (0.2, 0, 1)) horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle, (0, 1, 0)) horiz_ind(30, -0.8 * self.car.hull.angularVelocity, (1, 0, 0)) gl.glEnd() self.score_label.text = "%04i" % self.reward self.score_label.draw()