def reset(self): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] self.human_render = False self.moved_distance.clear() while True: success = self._create_track() if success: break print("retry to generate track (normal if there are not many of this messages)") if self.num_bots: # Generate Bot Cars: self.bot_cars = [] self.bot_targets = [] init_coord = [(-np.pi/2, -PLAYFIELD+15, -ROAD_WIDTH/2), (0, ROAD_WIDTH/2, -PLAYFIELD+20), (np.pi/2, PLAYFIELD-30, ROAD_WIDTH/2), (np.pi, -ROAD_WIDTH/2, PLAYFIELD-15)] # init_colors = [(0.8, 0.4, 1), (1, 0.5, 0.1), (0.1, 1, 0.1), (0.2, 0.8, 1)] # trajectory = ['38', '52', '74', '96'] # self.bot_targets.extend([t[0] for t in trajectory]) for i in range(self.num_bots): if self.start_file: target, new_coord = self.start_file_position(forward_shift=25, number=i+1) else: target, new_coord = self.random_position(forward_shift=25) self.bot_targets.append(target[0]) car = DummyCar(self.world, new_coord, color=None, bot=True) # j = 2*i+4 if 2*i+4 < 9 else 2 car.hull.path = target #f"{2*i+3}{j}" car.userData = self.car self.bot_cars.append(car) # Generate Agent: if not self.agent: init_coord = (0, PLAYFIELD+2, PLAYFIELD) target = np.random.choice(list(PATH.keys())) else: if self.start_file: target, init_coord = self.start_file_position(forward_shift=25, bot=False) else: target, init_coord = self.random_position(forward_shift=25, bot=False) # target, init_coord = '38', (-np.pi/2, -PLAYFIELD+15, -ROAD_WIDTH/2+2) penalty_sections = {2, 3, 4, 5, 6, 7, 8, 9} - set(map(int, target)) self.car = DummyCar(self.world, init_coord, penalty_sections, color=(0,0,0)) self.car.hull.path = target self.car.userData = self.car self.moved_distance.append([self.car.hull.position.x, self.car.hull.position.y]) if self.write: self._to_file() self._create_tiles() self._create_target() return self.step(None)[0]
def step(self, action): # transform action space from discrete to continuous if action == 0: action = [0, 0, 0] elif action == 1: action = [-1, 0, 0] elif action == 2: action = [1, 0, 0] elif action == 3: action = [0, 1, 0] elif action == 4: action = [0, 0, 1] # self.car.go_to_target(CarPath) if action is not None: self.car.steer(action[0]) self.car.gas(action[1]) self.car.brake(action[2]) if self.num_bots: prev_stop_values = [] # keep values of bot_cars for stop on cross ans restore them before exiting: first_cross = self.bot_cars[np.argmin(list(x.hull.cross_time for x in self.bot_cars))] min_cross = np.min(list(x.hull.cross_time for x in self.bot_cars)) active_path = set(x.hull.path for x in self.bot_cars if x.hull.cross_time != float('inf')) for i, car in enumerate(self.bot_cars): prev_stop_values.append(car.hull.stop) if car.hull.cross_time != float('inf') and car.hull.cross_time > min_cross: if len(INTERSECT[car.hull.path] & active_path) != 0: car.hull.stop = True if car.hull.stop: car.brake(0.8) else: car.go_to_target(PATH[car.hull.path], PATH_cKDTree[car.hull.path]) # Check if car is outside of field (close to target) # and then change it position if car.close_to_target(PATH[car.hull.path][-1]): self.bot_targets[i] = '0' if self.start_file: target, new_coord = self.start_file_position(number=i+1) else: target, new_coord = self.random_position() # new_color = np.random.rand(3) #car.hull.color new_color = [0, 0, 0] #car.hull.color # change color new_car = DummyCar(self.world, new_coord, color=new_color, bot=True) new_car.hull.path = target new_car.userData = new_car self.bot_cars[i] = new_car self.bot_targets[i] = target[0] car.step(1.0/FPS) # Returning previous values of stops: for i, car in enumerate(self.bot_cars): car.hull.stop = prev_stop_values[i] self.car.step(1.0/FPS) self.world.Step(1.0/FPS, 6*30, 2*30) self.moved_distance.append([self.car.hull.position.x, self.car.hull.position.y]) self.t += 1.0/FPS if self.write: self._to_file() self.state = self.render("state_pixels") # collision # basically i found each bos2d body in self.car and for each put listener in userData.collision: # if self.car.hull.collision: # print('Collision') # # if self.car.hull.penalty: # print('Penalty') # Reward: step_reward = 0 done = False if self.agent: if action is not None: # First step without action, called from reset() # self.reward -= 0.01 step_reward = self.reward - self.prev_reward self.prev_reward = self.reward x, y = self.car.hull.position if abs(x) > PLAYFIELD+5 or abs(y) > PLAYFIELD+5: done = True step_reward += -10 if self.car_goal.userData.finish: done = True step_reward += 10 if self.car.hull.collision: done = True step_reward += -10 if np.any([w.collision for w in self.car.wheels]): done = True step_reward += -10 # if self.car.hull.penalty: # done = True # step_reward += -0.1 # if np.linalg.norm(self.car.hull.linearVelocity) < 1: # step_reward += -0.1 if len(self.moved_distance) == self.moved_distance.maxlen: prev_pos = np.array(self.moved_distance[0]) curr = np.array(self.moved_distance[-1]) if np.linalg.norm(prev_pos - curr) < 1: done = True step_reward += -15 if self.training_epoch: if done: with open("training_positions.csv", 'a') as fin: fin.write(','.join(list(map(str, [self.training_epoch, self.car.hull.angle, self.car.hull.position.x, self.car.hull.position.y])))) fin.write('\n') return self.state, step_reward, done, {}
class CarRacingDiscrete(gym.Env, EzPickle): metadata = { 'render.modes': ['human', 'rgb_array', 'state_pixels'], 'video.frames_per_second' : FPS } def __init__(self, agent = True, num_bots = 1, track_form = 'X', \ write = False, data_path = 'car_racing_positions.csv', \ start_file = True, training_epoch = False): EzPickle.__init__(self) self.seed() self.contactListener_keepref = MyContactListener(self) self.world = Box2D.b2World((0,0), contactListener=self.contactListener_keepref) self.viewer = None self.invisible_state_window = None self.invisible_video_window = None self.road = None self.agent = agent self.car = None self.bot_cars = None self.reward = 0.0 self.prev_reward = 0.0 self.track_form = track_form self.data_path = data_path self.write = write self.training_epoch = training_epoch if write: car_title = ['car_angle', 'car_pos_x', 'car_pos_y'] bots_title = [] for i in range(num_bots): bots_title.extend([f'car_bot{i+1}_angle', f'car_bot{i+1}_pos_x', f'car_bot{i+1}_pos_y']) with open(data_path, 'w') as f: f.write(','.join(car_title + bots_title)) f.write('\n') # check the target position: self.moved_distance = deque(maxlen=1000) self.target = (0, 0) self.num_bots = num_bots self.start_file = start_file if start_file: with open("start_file.csv", 'r') as f: lines = f.readlines() self.start_positions = lines[15].strip().split(",") self.num_bots = len(self.start_positions) - 1 self.action_space = spaces.Discrete(5) # nothing, left, right, gas, break self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8) def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def _destroy(self): if not self.road: return for t in self.tiles: self.world.DestroyBody(t) for t in self.road: self.world.DestroyBody(t) self.road = [] self.tile = [] if self.agent: self.car.destroy() self.world.DestroyBody(self.car_goal) if self.num_bots: for car in self.bot_cars: car.destroy() def _create_track(self): # Creating Road Sections: # See notes: road_s1 = [(-ROAD_WIDTH, ROAD_WIDTH), (-ROAD_WIDTH, -ROAD_WIDTH), (ROAD_WIDTH, -ROAD_WIDTH), (ROAD_WIDTH, ROAD_WIDTH)] road_s2 = [(-PLAYFIELD, ROAD_WIDTH), (-PLAYFIELD, 0), (-ROAD_WIDTH, 0), (-ROAD_WIDTH, ROAD_WIDTH)] road_s3 = [(-PLAYFIELD, 0), (-PLAYFIELD, -ROAD_WIDTH), (-ROAD_WIDTH, -ROAD_WIDTH), (-ROAD_WIDTH, 0)] road_s4 = [(-ROAD_WIDTH, -ROAD_WIDTH), (-ROAD_WIDTH, -PLAYFIELD), (0, -PLAYFIELD), (0, -ROAD_WIDTH)] road_s5 = [(0, -ROAD_WIDTH), (0, -PLAYFIELD), (ROAD_WIDTH, -PLAYFIELD), (ROAD_WIDTH, -ROAD_WIDTH)] road_s6 = [(ROAD_WIDTH, 0), (ROAD_WIDTH, -ROAD_WIDTH), (PLAYFIELD, -ROAD_WIDTH), (PLAYFIELD, 0)] road_s7 = [(ROAD_WIDTH, ROAD_WIDTH), (ROAD_WIDTH, 0), (PLAYFIELD, 0), (PLAYFIELD, ROAD_WIDTH)] road_s8 = [(0, PLAYFIELD), (0, ROAD_WIDTH), (ROAD_WIDTH, ROAD_WIDTH), (ROAD_WIDTH, PLAYFIELD)] road_s9 = [(-ROAD_WIDTH, PLAYFIELD), (-ROAD_WIDTH, ROAD_WIDTH), (0, ROAD_WIDTH), (0, PLAYFIELD)] # Creating body of roads: self.road_poly = road_s1 + road_s2 + road_s3 + road_s4 + road_s5 + road_s6 + road_s7 if self.track_form == 'X': self.road_poly += road_s8 + road_s9 self.road = [] # Creating a static object - road with names for specific section + i need to define how to listen to it: for i in range(0, len(self.road_poly), 4): r = self.world.CreateStaticBody( fixtures=fixtureDef(shape=polygonShape(vertices=self.road_poly[i:i+4]), isSensor=True)) r.road_section = int(i/4)+1 r.name = 'road' r.userData = r self.road.append(r) # Creating sidewalks: sidewalk_h_nw = [(-PLAYFIELD, ROAD_WIDTH+SIDE_WALK), (-PLAYFIELD, ROAD_WIDTH), (-ROAD_WIDTH-SIDE_WALK*2, ROAD_WIDTH), (-ROAD_WIDTH-SIDE_WALK*2, ROAD_WIDTH+SIDE_WALK)] sidewalk_h_sw = [(x, y-2*ROAD_WIDTH-SIDE_WALK) for x, y in sidewalk_h_nw] sidewalk_h_ne = [(x+PLAYFIELD+2*SIDE_WALK+ROAD_WIDTH, y) for x, y in sidewalk_h_nw] sidewalk_h_se = [(x, y-2*ROAD_WIDTH-SIDE_WALK) for x, y in sidewalk_h_ne] sidewalk_v_nw = [(-ROAD_WIDTH-SIDE_WALK, PLAYFIELD), (-ROAD_WIDTH-SIDE_WALK, ROAD_WIDTH+SIDE_WALK*2), (-ROAD_WIDTH, ROAD_WIDTH+SIDE_WALK*2), (-ROAD_WIDTH, PLAYFIELD)] sidewalk_v_sw = [(x, y-PLAYFIELD-2*SIDE_WALK-ROAD_WIDTH) for x, y in sidewalk_v_nw] sidewalk_v_ne = [(x+2*ROAD_WIDTH+SIDE_WALK, y) for x, y in sidewalk_v_nw] sidewalk_v_se = [(x+2*ROAD_WIDTH+SIDE_WALK, y) for x, y in sidewalk_v_sw] # sidewalk_c_nw = [(-ROAD_WIDTH-2*SIDE_WALK+np.cos(rad)*1*SIDE_WALK, ROAD_WIDTH+2*SIDE_WALK+np.sin(rad)*1*SIDE_WALK) # for rad in np.linspace(-np.pi/2, 0, 12)] sidewalk_c_nw = sidewalk_v_nw[2:0:-1] + sidewalk_h_nw[3:1:-1] + [(-ROAD_WIDTH, ROAD_WIDTH)] sidewalk_c_ne = sidewalk_h_ne[1::-1] + sidewalk_v_ne[2:0:-1] + [(ROAD_WIDTH, ROAD_WIDTH)] sidewalk_c_sw = sidewalk_h_sw[3:1:-1] + sidewalk_v_sw[::3] + [(-ROAD_WIDTH, -ROAD_WIDTH)] sidewalk_c_se = sidewalk_v_se[::3] + sidewalk_h_se[1::-1] + [(ROAD_WIDTH, -ROAD_WIDTH)] self.all_sidewalks = [sidewalk_h_nw, sidewalk_h_ne, sidewalk_h_se, sidewalk_h_sw, sidewalk_v_nw, sidewalk_v_ne, sidewalk_v_se, sidewalk_v_sw, sidewalk_c_nw, sidewalk_c_ne, sidewalk_c_se, sidewalk_c_sw] #Now let's see the static world: sidewalk = self.world.CreateStaticBody( fixtures = [fixtureDef(shape=polygonShape(vertices=sw), isSensor=True) for sw in self.all_sidewalks]) sidewalk.name = 'sidewalk' sidewalk.userData = sidewalk # remove=============================================================== # w = self.world.CreateStaticBody(fixtures = fixtureDef(shape=polygonShape(box=(10,10, (20,20), 0)))) # w.name = 'car' # w.userData = w return True def random_position(self, forward_shift=0, bot=True, exclude=None): # creating list to diminish number of trajectories to choose from: target_set = set(PATH.keys()) if exclude is None else set(PATH.keys()) - exclude if len(target_set) == 0: print("No more places where to put car! Consider to decrease the number.") target = np.random.choice(list(target_set)) if exclude is None: exclude = {target} else: exclude.add(target) # if some cars on the same trajectory add distance between them space = 6*self.bot_targets.count(target[0]) - 3 - forward_shift if target[0] == '3': new_position = (-np.pi/2, -PLAYFIELD-space, -ROAD_WIDTH/2) if target[0] == '5': new_position = (0, ROAD_WIDTH/2, -PLAYFIELD-space) if target[0] == '7': new_position = (np.pi/2, PLAYFIELD+space, ROAD_WIDTH/2) if target[0] == '9': new_position = (np.pi, -ROAD_WIDTH/2, PLAYFIELD+space) if not bot: _, x, y = new_position if abs(x) > PLAYFIELD-5 or abs(y) > PLAYFIELD-5: return self.random_position(forward_shift, bot, exclude=exclude) for car in self.bot_cars: if car.close_to_target(new_position[1:], dist=3): # print(f"car target: {car.hull.path} and new_coord: {target}") # print(f"car position: {car.hull.position} and new_coord: {new_position[1:]}") # Choose new position: return self.random_position(forward_shift, bot, exclude=exclude) return target, new_position def start_file_position(self, forward_shift=0, bot=True, exclude=None, number=0): target = self.start_positions[number] # if some cars on the same trajectory add distance between them if self.num_bots: space = 6*self.bot_targets.count(target[0]) - 3 - forward_shift else: space = -3 - forward_shift if target[0] == '3': new_position = (-np.pi/2, -PLAYFIELD-space, -ROAD_WIDTH/2) if target[0] == '5': new_position = (0, ROAD_WIDTH/2, -PLAYFIELD-space) if target[0] == '7': new_position = (np.pi/2, PLAYFIELD+space, ROAD_WIDTH/2) if target[0] == '9': new_position = (np.pi, -ROAD_WIDTH/2, PLAYFIELD+space) if not bot: _, x, y = new_position if abs(x) > PLAYFIELD-5 or abs(y) > PLAYFIELD-5: return self.random_position(forward_shift, bot, exclude=exclude) if self.num_bots: for car in self.bot_cars: if car.close_to_target(new_position[1:], dist=3): # print(f"car target: {car.hull.path} and new_coord: {target}") # print(f"car position: {car.hull.position} and new_coord: {new_position[1:]}") # Choose new position: return self.start_file_position(forward_shift, bot, exclude=exclude) return target, new_position def _to_file(self): car_position = [self.car.hull.angle, self.car.hull.position.x, self.car.hull.position.y] bots_position = [] if self.num_bots: for car in self.bot_cars: bots_position.extend([car.hull.angle, car.hull.position.x, car.hull.position.y]) with open(self.data_path, 'a') as fout: fout.write(','.join(list(map(str, car_position + bots_position)))) fout.write('\n') def _create_tiles(self): self.tiles = [] self.tiles_poly = [] w, s = 1, 4 # width and step of tiles: if self.agent: sections = set(self.car.hull.path[1]) | {'5'} if '2' in sections: tiles_s2 = [[(-ROAD_WIDTH-(i+1)*w, 0), (-ROAD_WIDTH-i*w, 0), (-ROAD_WIDTH-i*w, ROAD_WIDTH), (-ROAD_WIDTH-(i+1)*w, ROAD_WIDTH)] \ for i in range(0, int(PLAYFIELD), s)] self.tiles_poly.extend(tiles_s2) if '3' in sections: tiles_s3 = [[(-ROAD_WIDTH-(i+1)*w, 0), (-ROAD_WIDTH-i*w, 0), (-ROAD_WIDTH-i*w, -ROAD_WIDTH), (-ROAD_WIDTH-(i+1)*w, -ROAD_WIDTH)] \ for i in range(0, int(PLAYFIELD), s)] self.tiles_poly.extend(tiles_s3) if '4' in sections: tiles_s4 = [[(0, -ROAD_WIDTH-i*w), (0, -ROAD_WIDTH-(i+1)*w), (-ROAD_WIDTH, -ROAD_WIDTH-(i+1)*w), (-ROAD_WIDTH, -ROAD_WIDTH-i*w)] \ for i in range(0, int(PLAYFIELD), s)] self.tiles_poly.extend(tiles_s4) if '5' in sections: tiles_s5 = [[(0, -ROAD_WIDTH-i*w), (0, -ROAD_WIDTH-(i+1)*w), (ROAD_WIDTH, -ROAD_WIDTH-(i+1)*w), (ROAD_WIDTH, -ROAD_WIDTH-i*w)] \ for i in range(0, int(PLAYFIELD), s)] self.tiles_poly.extend(tiles_s5) if '6' in sections: tiles_s6 = [[(ROAD_WIDTH+(i+1)*w, 0), (ROAD_WIDTH+i*w, 0), (ROAD_WIDTH+i*w, -ROAD_WIDTH), (ROAD_WIDTH+(i+1)*w, -ROAD_WIDTH)] \ for i in range(0, int(PLAYFIELD), s)] self.tiles_poly.extend(tiles_s6) if '7' in sections: tiles_s7 = [[(ROAD_WIDTH+(i+1)*w, 0), (ROAD_WIDTH+i*w, 0), (ROAD_WIDTH+i*w, ROAD_WIDTH), (ROAD_WIDTH+(i+1)*w, ROAD_WIDTH)] \ for i in range(0, int(PLAYFIELD), s)] self.tiles_poly.extend(tiles_s7) if '8' in sections: tiles_s8 = [[(0, ROAD_WIDTH+i*w), (0, ROAD_WIDTH+(i+1)*w), (ROAD_WIDTH, ROAD_WIDTH+(i+1)*w), (ROAD_WIDTH, ROAD_WIDTH+i*w)] \ for i in range(0, int(PLAYFIELD), s)] self.tiles_poly.extend(tiles_s8) if '9' in sections: tiles_s9 = [[(0, ROAD_WIDTH+i*w), (0, ROAD_WIDTH+(i+1)*w), (-ROAD_WIDTH, ROAD_WIDTH+(i+1)*w), (-ROAD_WIDTH, ROAD_WIDTH+i*w)] \ for i in range(0, int(PLAYFIELD), s)] self.tiles_poly.extend(tiles_s9) for tile in self.tiles_poly: t = self.world.CreateStaticBody( fixtures=fixtureDef(shape=polygonShape(vertices=tile), isSensor=True)) t.road_visited = False t.name = 'tile' t.userData = t self.tiles.append(t) def _create_target(self): target_vertices = { '2': [(-PLAYFIELD/2, ROAD_WIDTH), (-PLAYFIELD/2, 0), (-PLAYFIELD/2+3, 0), (-PLAYFIELD/2+3, ROAD_WIDTH)], '4': [(-ROAD_WIDTH, -PLAYFIELD/2), (0, -PLAYFIELD/2), (0, -PLAYFIELD/2+3), (-ROAD_WIDTH, -PLAYFIELD/2+3)], '6': [(PLAYFIELD/2, -ROAD_WIDTH), (PLAYFIELD/2, 0), (PLAYFIELD/2-3, 0), (PLAYFIELD/2-3, -ROAD_WIDTH)], '8': [(ROAD_WIDTH, PLAYFIELD/2), (0, PLAYFIELD/2), (0, PLAYFIELD/2-3), (ROAD_WIDTH, PLAYFIELD/2-3)] } if self.agent: goal = self.car.hull.path[1] self.car_goal_poly = target_vertices[goal] g = self.world.CreateStaticBody( fixtures=fixtureDef(shape=polygonShape(vertices=self.car_goal_poly), isSensor=True)) g.finish = False g.name = 'goal' g.userData = g self.car_goal = g def reset(self): self._destroy() self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.road_poly = [] self.human_render = False self.moved_distance.clear() while True: success = self._create_track() if success: break print("retry to generate track (normal if there are not many of this messages)") if self.num_bots: # Generate Bot Cars: self.bot_cars = [] self.bot_targets = [] init_coord = [(-np.pi/2, -PLAYFIELD+15, -ROAD_WIDTH/2), (0, ROAD_WIDTH/2, -PLAYFIELD+20), (np.pi/2, PLAYFIELD-30, ROAD_WIDTH/2), (np.pi, -ROAD_WIDTH/2, PLAYFIELD-15)] # init_colors = [(0.8, 0.4, 1), (1, 0.5, 0.1), (0.1, 1, 0.1), (0.2, 0.8, 1)] # trajectory = ['38', '52', '74', '96'] # self.bot_targets.extend([t[0] for t in trajectory]) for i in range(self.num_bots): if self.start_file: target, new_coord = self.start_file_position(forward_shift=25, number=i+1) else: target, new_coord = self.random_position(forward_shift=25) self.bot_targets.append(target[0]) car = DummyCar(self.world, new_coord, color=None, bot=True) # j = 2*i+4 if 2*i+4 < 9 else 2 car.hull.path = target #f"{2*i+3}{j}" car.userData = self.car self.bot_cars.append(car) # Generate Agent: if not self.agent: init_coord = (0, PLAYFIELD+2, PLAYFIELD) target = np.random.choice(list(PATH.keys())) else: if self.start_file: target, init_coord = self.start_file_position(forward_shift=25, bot=False) else: target, init_coord = self.random_position(forward_shift=25, bot=False) # target, init_coord = '38', (-np.pi/2, -PLAYFIELD+15, -ROAD_WIDTH/2+2) penalty_sections = {2, 3, 4, 5, 6, 7, 8, 9} - set(map(int, target)) self.car = DummyCar(self.world, init_coord, penalty_sections, color=(0,0,0)) self.car.hull.path = target self.car.userData = self.car self.moved_distance.append([self.car.hull.position.x, self.car.hull.position.y]) if self.write: self._to_file() self._create_tiles() self._create_target() return self.step(None)[0] def step(self, action): # transform action space from discrete to continuous if action == 0: action = [0, 0, 0] elif action == 1: action = [-1, 0, 0] elif action == 2: action = [1, 0, 0] elif action == 3: action = [0, 1, 0] elif action == 4: action = [0, 0, 1] # self.car.go_to_target(CarPath) if action is not None: self.car.steer(action[0]) self.car.gas(action[1]) self.car.brake(action[2]) if self.num_bots: prev_stop_values = [] # keep values of bot_cars for stop on cross ans restore them before exiting: first_cross = self.bot_cars[np.argmin(list(x.hull.cross_time for x in self.bot_cars))] min_cross = np.min(list(x.hull.cross_time for x in self.bot_cars)) active_path = set(x.hull.path for x in self.bot_cars if x.hull.cross_time != float('inf')) for i, car in enumerate(self.bot_cars): prev_stop_values.append(car.hull.stop) if car.hull.cross_time != float('inf') and car.hull.cross_time > min_cross: if len(INTERSECT[car.hull.path] & active_path) != 0: car.hull.stop = True if car.hull.stop: car.brake(0.8) else: car.go_to_target(PATH[car.hull.path], PATH_cKDTree[car.hull.path]) # Check if car is outside of field (close to target) # and then change it position if car.close_to_target(PATH[car.hull.path][-1]): self.bot_targets[i] = '0' if self.start_file: target, new_coord = self.start_file_position(number=i+1) else: target, new_coord = self.random_position() # new_color = np.random.rand(3) #car.hull.color new_color = [0, 0, 0] #car.hull.color # change color new_car = DummyCar(self.world, new_coord, color=new_color, bot=True) new_car.hull.path = target new_car.userData = new_car self.bot_cars[i] = new_car self.bot_targets[i] = target[0] car.step(1.0/FPS) # Returning previous values of stops: for i, car in enumerate(self.bot_cars): car.hull.stop = prev_stop_values[i] self.car.step(1.0/FPS) self.world.Step(1.0/FPS, 6*30, 2*30) self.moved_distance.append([self.car.hull.position.x, self.car.hull.position.y]) self.t += 1.0/FPS if self.write: self._to_file() self.state = self.render("state_pixels") # collision # basically i found each bos2d body in self.car and for each put listener in userData.collision: # if self.car.hull.collision: # print('Collision') # # if self.car.hull.penalty: # print('Penalty') # Reward: step_reward = 0 done = False if self.agent: if action is not None: # First step without action, called from reset() # self.reward -= 0.01 step_reward = self.reward - self.prev_reward self.prev_reward = self.reward x, y = self.car.hull.position if abs(x) > PLAYFIELD+5 or abs(y) > PLAYFIELD+5: done = True step_reward += -10 if self.car_goal.userData.finish: done = True step_reward += 10 if self.car.hull.collision: done = True step_reward += -10 if np.any([w.collision for w in self.car.wheels]): done = True step_reward += -10 # if self.car.hull.penalty: # done = True # step_reward += -0.1 # if np.linalg.norm(self.car.hull.linearVelocity) < 1: # step_reward += -0.1 if len(self.moved_distance) == self.moved_distance.maxlen: prev_pos = np.array(self.moved_distance[0]) curr = np.array(self.moved_distance[-1]) if np.linalg.norm(prev_pos - curr) < 1: done = True step_reward += -15 if self.training_epoch: if done: with open("training_positions.csv", 'a') as fin: fin.write(','.join(list(map(str, [self.training_epoch, self.car.hull.angle, self.car.hull.position.x, self.car.hull.position.y])))) fin.write('\n') return self.state, step_reward, done, {} def render(self, mode='human'): if self.viewer is None: from gym.envs.classic_control import rendering self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) self.transform = rendering.Transform() if "t" not in self.__dict__: return # reset() not called yet zoom = ZOOM*SCALE #0.1*SCALE*max(1-self.t, 0) + ZOOM*SCALE*min(self.t, 1) # Animate zoom first second zoom_state = ZOOM*SCALE*STATE_W/WINDOW_W zoom_video = ZOOM*SCALE*VIDEO_W/WINDOW_W scroll_x = 0 #self.car.hull.position[0] #0 scroll_y = 0 #self.car.hull.position[1] #-30 angle = 0 #-self.car.hull.angle #0 vel = 0 #self.car.hull.linearVelocity #0 self.transform.set_scale(zoom, zoom) self.transform.set_translation(WINDOW_W/2, WINDOW_H/2) # self.transform.set_translation( # WINDOW_W/2 - (scroll_x*zoom*math.cos(angle) - scroll_y*zoom*math.sin(angle)), # WINDOW_H/4 - (scroll_x*zoom*math.sin(angle) + scroll_y*zoom*math.cos(angle)) ) # self.transform.set_rotation(angle) self.car.draw(self.viewer) if self.num_bots: for car in self.bot_cars: car.draw(self.viewer) arr = None win = self.viewer.window if mode != 'state_pixels' and mode != 'rgb_array': win.switch_to() win.dispatch_events() if mode=="rgb_array" or mode=="state_pixels": win.clear() t = self.transform self.transform.set_translation(0, 0) self.transform.set_scale(0.0167, 0.0167) if mode=='rgb_array': VP_W = VIDEO_W VP_H = VIDEO_H else: VP_W = WINDOW_W//2 #STATE_W VP_H = WINDOW_H//2 #STATE_H gl.glViewport(0, 0, VP_W, VP_H) t.enable() self.render_road() for geom in self.viewer.onetime_geoms: geom.render() t.disable() # self.render_indicators(WINDOW_W, WINDOW_H) # TODO: find why 2x needed, wtf image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data() arr = np.fromstring(image_data.data, dtype=np.uint8, sep='') arr = arr.reshape(VP_H, VP_W, 4) arr = arr[::-1, :, 0:3] if mode=="rgb_array" and not self.human_render: # agent can call or not call env.render() itself when recording video. win.flip() if mode=='human': self.human_render = True win.clear() t = self.transform gl.glViewport(0, 0, WINDOW_W, WINDOW_H) t.enable() self.render_road() for geom in self.viewer.onetime_geoms: geom.render() t.disable() # self.render_indicators(WINDOW_W, WINDOW_H) win.flip() self.viewer.onetime_geoms = [] return arr def close(self): if self.viewer is not None: self.viewer.close() self.viewer = None def render_road(self): # Painting entire field in white: gl.glBegin(gl.GL_QUADS) gl.glColor4f(1, 1, 1, 1.0) #gl.glColor4f(0, 0.5, 0, 1.0) #gl.glColor4f(0.4, 0.8, 0.4, 1.0) # change color gl.glVertex3f(-PLAYFIELD, PLAYFIELD, 0) gl.glVertex3f(PLAYFIELD, PLAYFIELD, 0) gl.glVertex3f(PLAYFIELD, -PLAYFIELD, 0) gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0) # Drawing road: gl.glColor4f(*ROAD_COLOR, 1) for poly in self.road_poly: gl.glVertex3f(*poly, 0) gl.glEnd() # # Drawing tiles: # gl.glColor4f(0, 0, 0, 1) # for tile in self.tiles_poly: # gl.glBegin(gl.GL_POLYGON) # for t in tile: # gl.glVertex3f(*t, 0) # gl.glEnd() # Drawing a sidewalk: gl.glColor4f(0, 0, 0, 1) #gl.glColor4f(0.66, 0.66, 0.66, 1) # change color for sw in self.all_sidewalks: gl.glBegin(gl.GL_POLYGON) for v in sw: gl.glVertex3f(*v, 0) gl.glEnd() # Drawing road crossings: gl.glColor4f(0, 0, 0, 1) #gl.glColor4f(1, 1, 1, 1) # change color for cros in crossings: for temp in cros: gl.glBegin(gl.GL_QUADS) for v in temp: gl.glVertex3f(*v, 0) gl.glEnd() for line in cross_line: gl.glBegin(gl.GL_LINES) for v in line: gl.glVertex3f(*v, 0) gl.glEnd() # # Drawing a rock: # gl.glBegin(gl.GL_QUADS) # gl.glColor4f(0,0,0,1) # gl.glVertex3f(10, 30, 0) # gl.glVertex3f(10, 10, 0) # gl.glVertex3f(30, 10, 0) # gl.glVertex3f(30, 30, 0) # gl.glEnd() # Drawing target destination for car: gl.glBegin(gl.GL_QUADS) gl.glColor4f(*self.car.hull.color, 1) for v in self.car_goal_poly: gl.glVertex3f(*v, 0) gl.glEnd() # # Drawing car pathes: # gl.glPointSize(5) # for car in self.bot_cars: # gl.glBegin(gl.GL_POINTS) # gl.glColor4f(*car.hull.color, 1) # gl.glVertex3f(*car.target, 0) # gl.glEnd() # # gl.glBegin(gl.GL_LINES) # for v in PATH[car.hull.path]: # gl.glVertex3f(*v, 0) # gl.glEnd() def training_status(self, mode='human'): if self.viewer is None: from gym.envs.classic_control import rendering self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) self.transform = rendering.Transform() if "t" not in self.__dict__: return # reset() not called yet zoom = ZOOM*SCALE #0.1*SCALE*max(1-self.t, 0) + ZOOM*SCALE*min(self.t, 1) # Animate zoom first second zoom_state = ZOOM*SCALE*STATE_W/WINDOW_W zoom_video = ZOOM*SCALE*VIDEO_W/WINDOW_W scroll_x = 0 #self.car.hull.position[0] #0 scroll_y = 0 #self.car.hull.position[1] #-30 angle = 0 #-self.car.hull.angle #0 vel = 0 #self.car.hull.linearVelocity #0 self.transform.set_scale(zoom, zoom) self.transform.set_translation(WINDOW_W/2, WINDOW_H/2) # self.transform.set_translation( # WINDOW_W/2 - (scroll_x*zoom*math.cos(angle) - scroll_y*zoom*math.sin(angle)), # WINDOW_H/4 - (scroll_x*zoom*math.sin(angle) + scroll_y*zoom*math.cos(angle)) ) # self.transform.set_rotation(angle) arr = None win = self.viewer.window if mode != 'state_pixels' and mode != 'rgb_array': win.switch_to() win.dispatch_events() if mode=="rgb_array" or mode=="state_pixels": win.clear() t = self.transform self.transform.set_translation(0, 0) self.transform.set_scale(0.0167, 0.0167) if mode=='rgb_array': VP_W = VIDEO_W VP_H = VIDEO_H else: VP_W = WINDOW_W//2 #STATE_W VP_H = WINDOW_H//2 #STATE_H gl.glViewport(0, 0, VP_W, VP_H) t.enable() self.render_road() for geom in self.viewer.onetime_geoms: geom.render() t.disable() # self.render_indicators(WINDOW_W, WINDOW_H) # TODO: find why 2x needed, wtf image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data() arr = np.fromstring(image_data.data, dtype=np.uint8, sep='') arr = arr.reshape(VP_H, VP_W, 4) arr = arr[::-1, :, 0:3] if mode=="rgb_array" and not self.human_render: # agent can call or not call env.render() itself when recording video. win.flip() if mode=='human': self.human_render = True win.clear() t = self.transform gl.glViewport(0, 0, WINDOW_W, WINDOW_H) t.enable() self.render_road() for geom in self.viewer.onetime_geoms: geom.render() with open("training_positions.csv", 'r') as fin: line_number = sum(1 for _ in fin) with open("training_positions.csv", 'r') as fin: gl.glPointSize(10) for i, line in enumerate(fin): epoch, angle, coord_x, coord_y = list(map(float, line.strip().split(","))) new_coord = (angle, coord_x, coord_y) gl.glBegin(gl.GL_POINTS) alpha = (i+1)/line_number gl.glColor4f(alpha, 0, 1-alpha, 0.8) gl.glVertex3f(coord_x, coord_y, 0) gl.glEnd() t.disable() # self.render_indicators(WINDOW_W, WINDOW_H) win.flip() # # Drawing a rock: # gl.glBegin(gl.GL_QUADS) # gl.glColor4f(0,0,0,1) # gl.glVertex3f(10, 30, 0) # gl.glVertex3f(10, 10, 0) # gl.glVertex3f(30, 10, 0) # gl.glVertex3f(30, 30, 0) # gl.glEnd() self.viewer.onetime_geoms = [] return arr