Exemple #1
0
    def reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []
        self.human_render = False
        self.moved_distance.clear()

        while True:
            success = self._create_track()
            if success: break
            print("retry to generate track (normal if there are not many of this messages)")

        if self.num_bots:
            # Generate Bot Cars:
            self.bot_cars = []
            self.bot_targets = []
            init_coord = [(-np.pi/2, -PLAYFIELD+15, -ROAD_WIDTH/2),
                          (0, ROAD_WIDTH/2, -PLAYFIELD+20),
                          (np.pi/2, PLAYFIELD-30, ROAD_WIDTH/2),
                          (np.pi, -ROAD_WIDTH/2, PLAYFIELD-15)]

            # init_colors = [(0.8, 0.4, 1), (1, 0.5, 0.1), (0.1, 1, 0.1), (0.2, 0.8, 1)]
            # trajectory = ['38', '52', '74', '96']
            # self.bot_targets.extend([t[0] for t in trajectory])
            for i in range(self.num_bots):
                if self.start_file:
                    target, new_coord = self.start_file_position(forward_shift=25, number=i+1)
                else:
                    target, new_coord = self.random_position(forward_shift=25)
                self.bot_targets.append(target[0])
                car = DummyCar(self.world, new_coord, color=None, bot=True)
                # j = 2*i+4 if 2*i+4 < 9 else 2
                car.hull.path = target #f"{2*i+3}{j}"
                car.userData = self.car
                self.bot_cars.append(car)

        # Generate Agent:
        if not self.agent:
            init_coord = (0, PLAYFIELD+2, PLAYFIELD)
            target = np.random.choice(list(PATH.keys()))
        else:
            if self.start_file:
                target, init_coord = self.start_file_position(forward_shift=25, bot=False)
            else:
                target, init_coord = self.random_position(forward_shift=25, bot=False)
        # target, init_coord = '38', (-np.pi/2, -PLAYFIELD+15, -ROAD_WIDTH/2+2)
        penalty_sections = {2, 3, 4, 5, 6, 7, 8, 9} - set(map(int, target))
        self.car = DummyCar(self.world, init_coord, penalty_sections, color=(0,0,0))
        self.car.hull.path = target
        self.car.userData = self.car
        self.moved_distance.append([self.car.hull.position.x, self.car.hull.position.y])

        if self.write:
            self._to_file()

        self._create_tiles()
        self._create_target()

        return self.step(None)[0]
Exemple #2
0
    def step(self, action):
        # transform action space from discrete to continuous
        if action == 0:
            action = [0, 0, 0]
        elif action == 1:
            action = [-1, 0, 0]
        elif action == 2:
            action = [1, 0, 0]
        elif action == 3:
            action = [0, 1, 0]
        elif action == 4:
            action = [0, 0, 1]

        # self.car.go_to_target(CarPath)
        if action is not None:
            self.car.steer(action[0])
            self.car.gas(action[1])
            self.car.brake(action[2])

        if self.num_bots:
            prev_stop_values = [] # keep values of bot_cars for stop on cross ans restore them before exiting:
            first_cross = self.bot_cars[np.argmin(list(x.hull.cross_time for x in self.bot_cars))]
            min_cross = np.min(list(x.hull.cross_time for x in self.bot_cars))
            active_path = set(x.hull.path for x in self.bot_cars if x.hull.cross_time != float('inf'))
            for i, car in enumerate(self.bot_cars):
                prev_stop_values.append(car.hull.stop)

                if car.hull.cross_time != float('inf') and car.hull.cross_time > min_cross:
                    if len(INTERSECT[car.hull.path] & active_path) != 0:
                        car.hull.stop = True

                if car.hull.stop:
                    car.brake(0.8)
                else:
                    car.go_to_target(PATH[car.hull.path], PATH_cKDTree[car.hull.path])
                    # Check if car is outside of field (close to target)
                    # and then change it position
                    if car.close_to_target(PATH[car.hull.path][-1]):
                        self.bot_targets[i] = '0'
                        if self.start_file:
                            target, new_coord = self.start_file_position(number=i+1)
                        else:
                            target, new_coord = self.random_position()
                        # new_color = np.random.rand(3) #car.hull.color
                        new_color = [0, 0, 0] #car.hull.color # change color
                        new_car = DummyCar(self.world, new_coord, color=new_color, bot=True)
                        new_car.hull.path = target
                        new_car.userData = new_car
                        self.bot_cars[i] = new_car
                        self.bot_targets[i] = target[0]

                car.step(1.0/FPS)

            # Returning previous values of stops:
            for i, car in enumerate(self.bot_cars):
                car.hull.stop = prev_stop_values[i]

        self.car.step(1.0/FPS)
        self.world.Step(1.0/FPS, 6*30, 2*30)
        self.moved_distance.append([self.car.hull.position.x, self.car.hull.position.y])
        self.t += 1.0/FPS

        if self.write:
            self._to_file()

        self.state = self.render("state_pixels")

        # collision
        # basically i found each bos2d body in self.car and for each put listener in userData.collision:
        # if self.car.hull.collision:
        #     print('Collision')
        #
        # if self.car.hull.penalty:
        #     print('Penalty')

        # Reward:
        step_reward = 0
        done = False
        if self.agent:
            if action is not None: # First step without action, called from reset()
                # self.reward -= 0.01
                step_reward = self.reward - self.prev_reward
                self.prev_reward = self.reward
                x, y = self.car.hull.position
                if abs(x) > PLAYFIELD+5 or abs(y) > PLAYFIELD+5:
                    done = True
                    step_reward += -10
                if self.car_goal.userData.finish:
                    done = True
                    step_reward += 10
                if self.car.hull.collision:
                    done = True
                    step_reward += -10
                if np.any([w.collision for w in self.car.wheels]):
                    done = True
                    step_reward += -10
                # if self.car.hull.penalty:
                #     done = True
                #     step_reward += -0.1
                # if np.linalg.norm(self.car.hull.linearVelocity) < 1:
                #     step_reward += -0.1
                if len(self.moved_distance) == self.moved_distance.maxlen:
                    prev_pos = np.array(self.moved_distance[0])
                    curr = np.array(self.moved_distance[-1])
                    if np.linalg.norm(prev_pos - curr) < 1:
                        done = True
                        step_reward += -15

        if self.training_epoch:
            if done:
                with open("training_positions.csv", 'a') as fin:
                    fin.write(','.join(list(map(str, [self.training_epoch,
                                        self.car.hull.angle,
                                        self.car.hull.position.x,
                                        self.car.hull.position.y]))))
                    fin.write('\n')

        return self.state, step_reward, done, {}
Exemple #3
0
class CarRacingDiscrete(gym.Env, EzPickle):
    metadata = {
        'render.modes': ['human', 'rgb_array', 'state_pixels'],
        'video.frames_per_second' : FPS
    }

    def __init__(self, agent = True, num_bots = 1, track_form = 'X', \
                 write = False, data_path = 'car_racing_positions.csv', \
                 start_file = True, training_epoch = False):
        EzPickle.__init__(self)
        self.seed()
        self.contactListener_keepref = MyContactListener(self)
        self.world = Box2D.b2World((0,0), contactListener=self.contactListener_keepref)
        self.viewer = None
        self.invisible_state_window = None
        self.invisible_video_window = None
        self.road = None
        self.agent = agent
        self.car = None
        self.bot_cars = None
        self.reward = 0.0
        self.prev_reward = 0.0
        self.track_form = track_form
        self.data_path = data_path
        self.write = write
        self.training_epoch = training_epoch

        if write:
            car_title = ['car_angle', 'car_pos_x', 'car_pos_y']
            bots_title = []
            for i in range(num_bots):
                bots_title.extend([f'car_bot{i+1}_angle', f'car_bot{i+1}_pos_x', f'car_bot{i+1}_pos_y'])
            with open(data_path, 'w') as f:
                f.write(','.join(car_title + bots_title))
                f.write('\n')

        # check the target position:
        self.moved_distance = deque(maxlen=1000)
        self.target = (0, 0)
        self.num_bots = num_bots

        self.start_file = start_file
        if start_file:
            with open("start_file.csv", 'r') as f:
                lines = f.readlines()
                self.start_positions = lines[15].strip().split(",")
                self.num_bots = len(self.start_positions) - 1

        self.action_space = spaces.Discrete(5)  # nothing, left, right, gas, break
        self.observation_space = spaces.Box(low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8)

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _destroy(self):
        if not self.road: return
        for t in self.tiles:
            self.world.DestroyBody(t)
        for t in self.road:
            self.world.DestroyBody(t)
        self.road = []
        self.tile = []
        if self.agent:
            self.car.destroy()
            self.world.DestroyBody(self.car_goal)
        if self.num_bots:
            for car in self.bot_cars:
                car.destroy()

    def _create_track(self):
        # Creating Road Sections:
        # See notes:
        road_s1 = [(-ROAD_WIDTH, ROAD_WIDTH), (-ROAD_WIDTH, -ROAD_WIDTH),
                   (ROAD_WIDTH, -ROAD_WIDTH), (ROAD_WIDTH, ROAD_WIDTH)]

        road_s2 = [(-PLAYFIELD, ROAD_WIDTH), (-PLAYFIELD, 0), (-ROAD_WIDTH, 0),
                   (-ROAD_WIDTH, ROAD_WIDTH)]
        road_s3 = [(-PLAYFIELD, 0), (-PLAYFIELD, -ROAD_WIDTH),
                   (-ROAD_WIDTH, -ROAD_WIDTH), (-ROAD_WIDTH, 0)]

        road_s4 = [(-ROAD_WIDTH, -ROAD_WIDTH), (-ROAD_WIDTH, -PLAYFIELD),
                   (0, -PLAYFIELD), (0, -ROAD_WIDTH)]
        road_s5 = [(0, -ROAD_WIDTH), (0, -PLAYFIELD), (ROAD_WIDTH, -PLAYFIELD),
                   (ROAD_WIDTH, -ROAD_WIDTH)]

        road_s6 = [(ROAD_WIDTH, 0), (ROAD_WIDTH, -ROAD_WIDTH),
                   (PLAYFIELD, -ROAD_WIDTH), (PLAYFIELD, 0)]
        road_s7 = [(ROAD_WIDTH, ROAD_WIDTH), (ROAD_WIDTH, 0), (PLAYFIELD, 0),
                   (PLAYFIELD, ROAD_WIDTH)]

        road_s8 = [(0, PLAYFIELD), (0, ROAD_WIDTH), (ROAD_WIDTH, ROAD_WIDTH),
                   (ROAD_WIDTH, PLAYFIELD)]
        road_s9 = [(-ROAD_WIDTH, PLAYFIELD), (-ROAD_WIDTH, ROAD_WIDTH),
                   (0, ROAD_WIDTH), (0, PLAYFIELD)]

        # Creating body of roads:
        self.road_poly = road_s1 + road_s2 + road_s3 + road_s4 + road_s5 + road_s6 + road_s7
        if self.track_form == 'X':
            self.road_poly += road_s8 + road_s9

        self.road = []

        # Creating a static object - road with names for specific section + i need to define how to listen to it:
        for i in range(0, len(self.road_poly), 4):
            r = self.world.CreateStaticBody(
                    fixtures=fixtureDef(shape=polygonShape(vertices=self.road_poly[i:i+4]), isSensor=True))
            r.road_section = int(i/4)+1
            r.name = 'road'
            r.userData = r
            self.road.append(r)

        # Creating sidewalks:
        sidewalk_h_nw = [(-PLAYFIELD, ROAD_WIDTH+SIDE_WALK),
                          (-PLAYFIELD, ROAD_WIDTH),
                          (-ROAD_WIDTH-SIDE_WALK*2, ROAD_WIDTH),
                          (-ROAD_WIDTH-SIDE_WALK*2, ROAD_WIDTH+SIDE_WALK)]
        sidewalk_h_sw = [(x, y-2*ROAD_WIDTH-SIDE_WALK) for x, y in sidewalk_h_nw]
        sidewalk_h_ne = [(x+PLAYFIELD+2*SIDE_WALK+ROAD_WIDTH, y) for x, y in sidewalk_h_nw]
        sidewalk_h_se = [(x, y-2*ROAD_WIDTH-SIDE_WALK) for x, y in sidewalk_h_ne]

        sidewalk_v_nw = [(-ROAD_WIDTH-SIDE_WALK, PLAYFIELD),
                         (-ROAD_WIDTH-SIDE_WALK, ROAD_WIDTH+SIDE_WALK*2),
                         (-ROAD_WIDTH, ROAD_WIDTH+SIDE_WALK*2),
                         (-ROAD_WIDTH, PLAYFIELD)]

        sidewalk_v_sw = [(x, y-PLAYFIELD-2*SIDE_WALK-ROAD_WIDTH) for x, y in sidewalk_v_nw]
        sidewalk_v_ne = [(x+2*ROAD_WIDTH+SIDE_WALK, y) for x, y in sidewalk_v_nw]
        sidewalk_v_se = [(x+2*ROAD_WIDTH+SIDE_WALK, y) for x, y in sidewalk_v_sw]

        # sidewalk_c_nw = [(-ROAD_WIDTH-2*SIDE_WALK+np.cos(rad)*1*SIDE_WALK, ROAD_WIDTH+2*SIDE_WALK+np.sin(rad)*1*SIDE_WALK)
        #             for rad in np.linspace(-np.pi/2, 0, 12)]
        sidewalk_c_nw = sidewalk_v_nw[2:0:-1] + sidewalk_h_nw[3:1:-1] + [(-ROAD_WIDTH, ROAD_WIDTH)]
        sidewalk_c_ne = sidewalk_h_ne[1::-1] + sidewalk_v_ne[2:0:-1] + [(ROAD_WIDTH, ROAD_WIDTH)]
        sidewalk_c_sw = sidewalk_h_sw[3:1:-1] + sidewalk_v_sw[::3] + [(-ROAD_WIDTH, -ROAD_WIDTH)]
        sidewalk_c_se = sidewalk_v_se[::3] + sidewalk_h_se[1::-1] + [(ROAD_WIDTH, -ROAD_WIDTH)]

        self.all_sidewalks = [sidewalk_h_nw, sidewalk_h_ne, sidewalk_h_se, sidewalk_h_sw,
                              sidewalk_v_nw, sidewalk_v_ne, sidewalk_v_se, sidewalk_v_sw,
                              sidewalk_c_nw, sidewalk_c_ne, sidewalk_c_se, sidewalk_c_sw]

        #Now let's see the static world:
        sidewalk = self.world.CreateStaticBody(
            fixtures = [fixtureDef(shape=polygonShape(vertices=sw), isSensor=True) for sw in self.all_sidewalks])
        sidewalk.name = 'sidewalk'
        sidewalk.userData = sidewalk

        # remove===============================================================
        # w = self.world.CreateStaticBody(fixtures = fixtureDef(shape=polygonShape(box=(10,10, (20,20), 0))))
        # w.name = 'car'
        # w.userData = w
        return True

    def random_position(self, forward_shift=0, bot=True, exclude=None):
        # creating list to diminish number of trajectories to choose from:
        target_set = set(PATH.keys()) if exclude is None else set(PATH.keys()) - exclude
        if len(target_set) == 0:
            print("No more places where to put car! Consider to decrease the number.")

        target = np.random.choice(list(target_set))
        if exclude is None:
            exclude = {target}
        else:
            exclude.add(target)

        # if some cars on the same trajectory add distance between them
        space = 6*self.bot_targets.count(target[0]) - 3 - forward_shift

        if target[0] == '3':
            new_position = (-np.pi/2, -PLAYFIELD-space, -ROAD_WIDTH/2)
        if target[0] == '5':
            new_position = (0, ROAD_WIDTH/2, -PLAYFIELD-space)
        if target[0] == '7':
            new_position = (np.pi/2, PLAYFIELD+space, ROAD_WIDTH/2)
        if target[0] == '9':
            new_position = (np.pi, -ROAD_WIDTH/2, PLAYFIELD+space)

        if not bot:
            _, x, y = new_position
            if abs(x) > PLAYFIELD-5 or abs(y) > PLAYFIELD-5:
                return self.random_position(forward_shift, bot, exclude=exclude)

        for car in self.bot_cars:
            if car.close_to_target(new_position[1:], dist=3):
                # print(f"car target: {car.hull.path} and new_coord: {target}")
                # print(f"car position: {car.hull.position} and new_coord: {new_position[1:]}")
                # Choose new position:
                return self.random_position(forward_shift, bot, exclude=exclude)

        return target, new_position

    def start_file_position(self, forward_shift=0, bot=True, exclude=None, number=0):

        target = self.start_positions[number]

        # if some cars on the same trajectory add distance between them
        if self.num_bots:
            space = 6*self.bot_targets.count(target[0]) - 3 - forward_shift
        else:
            space = -3 - forward_shift

        if target[0] == '3':
            new_position = (-np.pi/2, -PLAYFIELD-space, -ROAD_WIDTH/2)
        if target[0] == '5':
            new_position = (0, ROAD_WIDTH/2, -PLAYFIELD-space)
        if target[0] == '7':
            new_position = (np.pi/2, PLAYFIELD+space, ROAD_WIDTH/2)
        if target[0] == '9':
            new_position = (np.pi, -ROAD_WIDTH/2, PLAYFIELD+space)

        if not bot:
            _, x, y = new_position
            if abs(x) > PLAYFIELD-5 or abs(y) > PLAYFIELD-5:
                return self.random_position(forward_shift, bot, exclude=exclude)

        if self.num_bots:
            for car in self.bot_cars:
                if car.close_to_target(new_position[1:], dist=3):
                    # print(f"car target: {car.hull.path} and new_coord: {target}")
                    # print(f"car position: {car.hull.position} and new_coord: {new_position[1:]}")
                    # Choose new position:
                    return self.start_file_position(forward_shift, bot, exclude=exclude)

        return target, new_position

    def _to_file(self):
        car_position = [self.car.hull.angle,
                        self.car.hull.position.x,
                        self.car.hull.position.y]

        bots_position = []
        if self.num_bots:
            for car in self.bot_cars:
                bots_position.extend([car.hull.angle,
                                      car.hull.position.x,
                                      car.hull.position.y])

        with open(self.data_path, 'a') as fout:
            fout.write(','.join(list(map(str, car_position + bots_position))))
            fout.write('\n')

    def _create_tiles(self):
        self.tiles = []
        self.tiles_poly = []
        w, s = 1, 4 # width and step of tiles:
        if self.agent:
            sections = set(self.car.hull.path[1]) | {'5'}
            if '2' in sections:
                tiles_s2 = [[(-ROAD_WIDTH-(i+1)*w, 0), (-ROAD_WIDTH-i*w, 0), (-ROAD_WIDTH-i*w, ROAD_WIDTH), (-ROAD_WIDTH-(i+1)*w, ROAD_WIDTH)] \
                            for i in range(0, int(PLAYFIELD), s)]
                self.tiles_poly.extend(tiles_s2)
            if '3' in sections:
                tiles_s3 = [[(-ROAD_WIDTH-(i+1)*w, 0), (-ROAD_WIDTH-i*w, 0), (-ROAD_WIDTH-i*w, -ROAD_WIDTH), (-ROAD_WIDTH-(i+1)*w, -ROAD_WIDTH)] \
                            for i in range(0, int(PLAYFIELD), s)]
                self.tiles_poly.extend(tiles_s3)
            if '4' in sections:
                tiles_s4 = [[(0, -ROAD_WIDTH-i*w), (0, -ROAD_WIDTH-(i+1)*w), (-ROAD_WIDTH, -ROAD_WIDTH-(i+1)*w), (-ROAD_WIDTH, -ROAD_WIDTH-i*w)] \
                            for i in range(0, int(PLAYFIELD), s)]
                self.tiles_poly.extend(tiles_s4)
            if '5' in sections:
                tiles_s5 = [[(0, -ROAD_WIDTH-i*w), (0, -ROAD_WIDTH-(i+1)*w), (ROAD_WIDTH, -ROAD_WIDTH-(i+1)*w), (ROAD_WIDTH, -ROAD_WIDTH-i*w)] \
                            for i in range(0, int(PLAYFIELD), s)]
                self.tiles_poly.extend(tiles_s5)
            if '6' in sections:
                tiles_s6 = [[(ROAD_WIDTH+(i+1)*w, 0), (ROAD_WIDTH+i*w, 0), (ROAD_WIDTH+i*w, -ROAD_WIDTH), (ROAD_WIDTH+(i+1)*w, -ROAD_WIDTH)] \
                            for i in range(0, int(PLAYFIELD), s)]
                self.tiles_poly.extend(tiles_s6)
            if '7' in sections:
                tiles_s7 = [[(ROAD_WIDTH+(i+1)*w, 0), (ROAD_WIDTH+i*w, 0), (ROAD_WIDTH+i*w, ROAD_WIDTH), (ROAD_WIDTH+(i+1)*w, ROAD_WIDTH)] \
                            for i in range(0, int(PLAYFIELD), s)]
                self.tiles_poly.extend(tiles_s7)
            if '8' in sections:
                tiles_s8 = [[(0, ROAD_WIDTH+i*w), (0, ROAD_WIDTH+(i+1)*w), (ROAD_WIDTH, ROAD_WIDTH+(i+1)*w), (ROAD_WIDTH, ROAD_WIDTH+i*w)] \
                            for i in range(0, int(PLAYFIELD), s)]
                self.tiles_poly.extend(tiles_s8)
            if '9' in sections:
                tiles_s9 = [[(0, ROAD_WIDTH+i*w), (0, ROAD_WIDTH+(i+1)*w), (-ROAD_WIDTH, ROAD_WIDTH+(i+1)*w), (-ROAD_WIDTH, ROAD_WIDTH+i*w)] \
                            for i in range(0, int(PLAYFIELD), s)]
                self.tiles_poly.extend(tiles_s9)

            for tile in self.tiles_poly:
                t = self.world.CreateStaticBody(
                        fixtures=fixtureDef(shape=polygonShape(vertices=tile), isSensor=True))
                t.road_visited = False
                t.name = 'tile'
                t.userData = t
                self.tiles.append(t)

    def _create_target(self):
        target_vertices = {
            '2': [(-PLAYFIELD/2, ROAD_WIDTH), (-PLAYFIELD/2, 0), (-PLAYFIELD/2+3, 0), (-PLAYFIELD/2+3, ROAD_WIDTH)],
            '4': [(-ROAD_WIDTH, -PLAYFIELD/2), (0, -PLAYFIELD/2), (0, -PLAYFIELD/2+3), (-ROAD_WIDTH, -PLAYFIELD/2+3)],
            '6': [(PLAYFIELD/2, -ROAD_WIDTH), (PLAYFIELD/2, 0), (PLAYFIELD/2-3, 0), (PLAYFIELD/2-3, -ROAD_WIDTH)],
            '8': [(ROAD_WIDTH, PLAYFIELD/2), (0, PLAYFIELD/2), (0, PLAYFIELD/2-3), (ROAD_WIDTH, PLAYFIELD/2-3)]
        }
        if self.agent:
            goal = self.car.hull.path[1]
            self.car_goal_poly = target_vertices[goal]
            g = self.world.CreateStaticBody(
                    fixtures=fixtureDef(shape=polygonShape(vertices=self.car_goal_poly),
                                        isSensor=True))
            g.finish = False
            g.name = 'goal'
            g.userData = g
            self.car_goal = g

    def reset(self):
        self._destroy()
        self.reward = 0.0
        self.prev_reward = 0.0
        self.tile_visited_count = 0
        self.t = 0.0
        self.road_poly = []
        self.human_render = False
        self.moved_distance.clear()

        while True:
            success = self._create_track()
            if success: break
            print("retry to generate track (normal if there are not many of this messages)")

        if self.num_bots:
            # Generate Bot Cars:
            self.bot_cars = []
            self.bot_targets = []
            init_coord = [(-np.pi/2, -PLAYFIELD+15, -ROAD_WIDTH/2),
                          (0, ROAD_WIDTH/2, -PLAYFIELD+20),
                          (np.pi/2, PLAYFIELD-30, ROAD_WIDTH/2),
                          (np.pi, -ROAD_WIDTH/2, PLAYFIELD-15)]

            # init_colors = [(0.8, 0.4, 1), (1, 0.5, 0.1), (0.1, 1, 0.1), (0.2, 0.8, 1)]
            # trajectory = ['38', '52', '74', '96']
            # self.bot_targets.extend([t[0] for t in trajectory])
            for i in range(self.num_bots):
                if self.start_file:
                    target, new_coord = self.start_file_position(forward_shift=25, number=i+1)
                else:
                    target, new_coord = self.random_position(forward_shift=25)
                self.bot_targets.append(target[0])
                car = DummyCar(self.world, new_coord, color=None, bot=True)
                # j = 2*i+4 if 2*i+4 < 9 else 2
                car.hull.path = target #f"{2*i+3}{j}"
                car.userData = self.car
                self.bot_cars.append(car)

        # Generate Agent:
        if not self.agent:
            init_coord = (0, PLAYFIELD+2, PLAYFIELD)
            target = np.random.choice(list(PATH.keys()))
        else:
            if self.start_file:
                target, init_coord = self.start_file_position(forward_shift=25, bot=False)
            else:
                target, init_coord = self.random_position(forward_shift=25, bot=False)
        # target, init_coord = '38', (-np.pi/2, -PLAYFIELD+15, -ROAD_WIDTH/2+2)
        penalty_sections = {2, 3, 4, 5, 6, 7, 8, 9} - set(map(int, target))
        self.car = DummyCar(self.world, init_coord, penalty_sections, color=(0,0,0))
        self.car.hull.path = target
        self.car.userData = self.car
        self.moved_distance.append([self.car.hull.position.x, self.car.hull.position.y])

        if self.write:
            self._to_file()

        self._create_tiles()
        self._create_target()

        return self.step(None)[0]

    def step(self, action):
        # transform action space from discrete to continuous
        if action == 0:
            action = [0, 0, 0]
        elif action == 1:
            action = [-1, 0, 0]
        elif action == 2:
            action = [1, 0, 0]
        elif action == 3:
            action = [0, 1, 0]
        elif action == 4:
            action = [0, 0, 1]

        # self.car.go_to_target(CarPath)
        if action is not None:
            self.car.steer(action[0])
            self.car.gas(action[1])
            self.car.brake(action[2])

        if self.num_bots:
            prev_stop_values = [] # keep values of bot_cars for stop on cross ans restore them before exiting:
            first_cross = self.bot_cars[np.argmin(list(x.hull.cross_time for x in self.bot_cars))]
            min_cross = np.min(list(x.hull.cross_time for x in self.bot_cars))
            active_path = set(x.hull.path for x in self.bot_cars if x.hull.cross_time != float('inf'))
            for i, car in enumerate(self.bot_cars):
                prev_stop_values.append(car.hull.stop)

                if car.hull.cross_time != float('inf') and car.hull.cross_time > min_cross:
                    if len(INTERSECT[car.hull.path] & active_path) != 0:
                        car.hull.stop = True

                if car.hull.stop:
                    car.brake(0.8)
                else:
                    car.go_to_target(PATH[car.hull.path], PATH_cKDTree[car.hull.path])
                    # Check if car is outside of field (close to target)
                    # and then change it position
                    if car.close_to_target(PATH[car.hull.path][-1]):
                        self.bot_targets[i] = '0'
                        if self.start_file:
                            target, new_coord = self.start_file_position(number=i+1)
                        else:
                            target, new_coord = self.random_position()
                        # new_color = np.random.rand(3) #car.hull.color
                        new_color = [0, 0, 0] #car.hull.color # change color
                        new_car = DummyCar(self.world, new_coord, color=new_color, bot=True)
                        new_car.hull.path = target
                        new_car.userData = new_car
                        self.bot_cars[i] = new_car
                        self.bot_targets[i] = target[0]

                car.step(1.0/FPS)

            # Returning previous values of stops:
            for i, car in enumerate(self.bot_cars):
                car.hull.stop = prev_stop_values[i]

        self.car.step(1.0/FPS)
        self.world.Step(1.0/FPS, 6*30, 2*30)
        self.moved_distance.append([self.car.hull.position.x, self.car.hull.position.y])
        self.t += 1.0/FPS

        if self.write:
            self._to_file()

        self.state = self.render("state_pixels")

        # collision
        # basically i found each bos2d body in self.car and for each put listener in userData.collision:
        # if self.car.hull.collision:
        #     print('Collision')
        #
        # if self.car.hull.penalty:
        #     print('Penalty')

        # Reward:
        step_reward = 0
        done = False
        if self.agent:
            if action is not None: # First step without action, called from reset()
                # self.reward -= 0.01
                step_reward = self.reward - self.prev_reward
                self.prev_reward = self.reward
                x, y = self.car.hull.position
                if abs(x) > PLAYFIELD+5 or abs(y) > PLAYFIELD+5:
                    done = True
                    step_reward += -10
                if self.car_goal.userData.finish:
                    done = True
                    step_reward += 10
                if self.car.hull.collision:
                    done = True
                    step_reward += -10
                if np.any([w.collision for w in self.car.wheels]):
                    done = True
                    step_reward += -10
                # if self.car.hull.penalty:
                #     done = True
                #     step_reward += -0.1
                # if np.linalg.norm(self.car.hull.linearVelocity) < 1:
                #     step_reward += -0.1
                if len(self.moved_distance) == self.moved_distance.maxlen:
                    prev_pos = np.array(self.moved_distance[0])
                    curr = np.array(self.moved_distance[-1])
                    if np.linalg.norm(prev_pos - curr) < 1:
                        done = True
                        step_reward += -15

        if self.training_epoch:
            if done:
                with open("training_positions.csv", 'a') as fin:
                    fin.write(','.join(list(map(str, [self.training_epoch,
                                        self.car.hull.angle,
                                        self.car.hull.position.x,
                                        self.car.hull.position.y]))))
                    fin.write('\n')

        return self.state, step_reward, done, {}

    def render(self, mode='human'):
        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
            self.transform = rendering.Transform()

        if "t" not in self.__dict__: return  # reset() not called yet

        zoom = ZOOM*SCALE #0.1*SCALE*max(1-self.t, 0) + ZOOM*SCALE*min(self.t, 1)   # Animate zoom first second
        zoom_state  = ZOOM*SCALE*STATE_W/WINDOW_W
        zoom_video  = ZOOM*SCALE*VIDEO_W/WINDOW_W
        scroll_x = 0 #self.car.hull.position[0] #0
        scroll_y = 0 #self.car.hull.position[1] #-30
        angle = 0 #-self.car.hull.angle #0
        vel = 0 #self.car.hull.linearVelocity #0
        self.transform.set_scale(zoom, zoom)
        self.transform.set_translation(WINDOW_W/2, WINDOW_H/2)
        # self.transform.set_translation(
        #     WINDOW_W/2 - (scroll_x*zoom*math.cos(angle) - scroll_y*zoom*math.sin(angle)),
        #     WINDOW_H/4 - (scroll_x*zoom*math.sin(angle) + scroll_y*zoom*math.cos(angle)) )
        # self.transform.set_rotation(angle)

        self.car.draw(self.viewer)
        if self.num_bots:
            for car in self.bot_cars:
                car.draw(self.viewer)

        arr = None
        win = self.viewer.window
        if mode != 'state_pixels' and mode != 'rgb_array':
            win.switch_to()
            win.dispatch_events()
        if mode=="rgb_array" or mode=="state_pixels":
            win.clear()
            t = self.transform
            self.transform.set_translation(0, 0)
            self.transform.set_scale(0.0167, 0.0167)
            if mode=='rgb_array':
                VP_W = VIDEO_W
                VP_H = VIDEO_H
            else:
                VP_W = WINDOW_W//2 #STATE_W
                VP_H = WINDOW_H//2 #STATE_H
            gl.glViewport(0, 0, VP_W, VP_H)
            t.enable()
            self.render_road()
            for geom in self.viewer.onetime_geoms:
                geom.render()
            t.disable()
            # self.render_indicators(WINDOW_W, WINDOW_H)  # TODO: find why 2x needed, wtf
            image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
            arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
            arr = arr.reshape(VP_H, VP_W, 4)
            arr = arr[::-1, :, 0:3]

        if mode=="rgb_array" and not self.human_render: # agent can call or not call env.render() itself when recording video.
            win.flip()

        if mode=='human':
            self.human_render = True
            win.clear()
            t = self.transform
            gl.glViewport(0, 0, WINDOW_W, WINDOW_H)
            t.enable()
            self.render_road()
            for geom in self.viewer.onetime_geoms:
                geom.render()
            t.disable()
            # self.render_indicators(WINDOW_W, WINDOW_H)
            win.flip()

        self.viewer.onetime_geoms = []
        return arr

    def close(self):
        if self.viewer is not None:
            self.viewer.close()
            self.viewer = None

    def render_road(self):
        # Painting entire field in white:
        gl.glBegin(gl.GL_QUADS)
        gl.glColor4f(1, 1, 1, 1.0) #gl.glColor4f(0, 0.5, 0, 1.0) #gl.glColor4f(0.4, 0.8, 0.4, 1.0) # change color
        gl.glVertex3f(-PLAYFIELD, PLAYFIELD, 0)
        gl.glVertex3f(PLAYFIELD, PLAYFIELD, 0)
        gl.glVertex3f(PLAYFIELD, -PLAYFIELD, 0)
        gl.glVertex3f(-PLAYFIELD, -PLAYFIELD, 0)
        # Drawing road:
        gl.glColor4f(*ROAD_COLOR, 1)
        for poly in self.road_poly:
            gl.glVertex3f(*poly, 0)
        gl.glEnd()

        # # Drawing tiles:
        # gl.glColor4f(0, 0, 0, 1)
        # for tile in self.tiles_poly:
        #     gl.glBegin(gl.GL_POLYGON)
        #     for t in tile:
        #         gl.glVertex3f(*t, 0)
        #     gl.glEnd()

        # Drawing a sidewalk:
        gl.glColor4f(0, 0, 0, 1) #gl.glColor4f(0.66, 0.66, 0.66, 1)  # change color
        for sw in self.all_sidewalks:
            gl.glBegin(gl.GL_POLYGON)
            for v in sw:
                gl.glVertex3f(*v, 0)
            gl.glEnd()

        # Drawing road crossings:
        gl.glColor4f(0, 0, 0, 1) #gl.glColor4f(1, 1, 1, 1) # change color
        for cros in crossings:
            for temp in cros:
                gl.glBegin(gl.GL_QUADS)
                for v in temp:
                    gl.glVertex3f(*v, 0)
                gl.glEnd()
        for line in cross_line:
            gl.glBegin(gl.GL_LINES)
            for v in line:
                gl.glVertex3f(*v, 0)
            gl.glEnd()

        # # Drawing a rock:
        # gl.glBegin(gl.GL_QUADS)
        # gl.glColor4f(0,0,0,1)
        # gl.glVertex3f(10, 30, 0)
        # gl.glVertex3f(10, 10, 0)
        # gl.glVertex3f(30, 10, 0)
        # gl.glVertex3f(30, 30, 0)
        # gl.glEnd()

        # Drawing target destination for car:
        gl.glBegin(gl.GL_QUADS)
        gl.glColor4f(*self.car.hull.color, 1)
        for v in self.car_goal_poly:
            gl.glVertex3f(*v, 0)
        gl.glEnd()

        # # Drawing car pathes:
        # gl.glPointSize(5)
        # for car in self.bot_cars:
        #     gl.glBegin(gl.GL_POINTS)
        #     gl.glColor4f(*car.hull.color, 1)
        #     gl.glVertex3f(*car.target, 0)
        #     gl.glEnd()
        #
        #     gl.glBegin(gl.GL_LINES)
        #     for v in PATH[car.hull.path]:
        #         gl.glVertex3f(*v, 0)
        #     gl.glEnd()

    def training_status(self, mode='human'):
        if self.viewer is None:
            from gym.envs.classic_control import rendering
            self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
            self.transform = rendering.Transform()

        if "t" not in self.__dict__: return  # reset() not called yet

        zoom = ZOOM*SCALE #0.1*SCALE*max(1-self.t, 0) + ZOOM*SCALE*min(self.t, 1)   # Animate zoom first second
        zoom_state  = ZOOM*SCALE*STATE_W/WINDOW_W
        zoom_video  = ZOOM*SCALE*VIDEO_W/WINDOW_W
        scroll_x = 0 #self.car.hull.position[0] #0
        scroll_y = 0 #self.car.hull.position[1] #-30
        angle = 0 #-self.car.hull.angle #0
        vel = 0 #self.car.hull.linearVelocity #0
        self.transform.set_scale(zoom, zoom)
        self.transform.set_translation(WINDOW_W/2, WINDOW_H/2)
        # self.transform.set_translation(
        #     WINDOW_W/2 - (scroll_x*zoom*math.cos(angle) - scroll_y*zoom*math.sin(angle)),
        #     WINDOW_H/4 - (scroll_x*zoom*math.sin(angle) + scroll_y*zoom*math.cos(angle)) )
        # self.transform.set_rotation(angle)


        arr = None
        win = self.viewer.window
        if mode != 'state_pixels' and mode != 'rgb_array':
            win.switch_to()
            win.dispatch_events()
        if mode=="rgb_array" or mode=="state_pixels":
            win.clear()
            t = self.transform
            self.transform.set_translation(0, 0)
            self.transform.set_scale(0.0167, 0.0167)
            if mode=='rgb_array':
                VP_W = VIDEO_W
                VP_H = VIDEO_H
            else:
                VP_W = WINDOW_W//2 #STATE_W
                VP_H = WINDOW_H//2 #STATE_H
            gl.glViewport(0, 0, VP_W, VP_H)
            t.enable()
            self.render_road()
            for geom in self.viewer.onetime_geoms:
                geom.render()
            t.disable()
            # self.render_indicators(WINDOW_W, WINDOW_H)  # TODO: find why 2x needed, wtf
            image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
            arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
            arr = arr.reshape(VP_H, VP_W, 4)
            arr = arr[::-1, :, 0:3]

        if mode=="rgb_array" and not self.human_render: # agent can call or not call env.render() itself when recording video.
            win.flip()

        if mode=='human':
            self.human_render = True
            win.clear()
            t = self.transform
            gl.glViewport(0, 0, WINDOW_W, WINDOW_H)
            t.enable()
            self.render_road()
            for geom in self.viewer.onetime_geoms:
                geom.render()

            with open("training_positions.csv", 'r') as fin:
                line_number = sum(1 for _ in fin)

            with open("training_positions.csv", 'r') as fin:
                gl.glPointSize(10)
                for i, line in enumerate(fin):
                    epoch, angle, coord_x, coord_y = list(map(float, line.strip().split(",")))
                    new_coord = (angle, coord_x, coord_y)

                    gl.glBegin(gl.GL_POINTS)
                    alpha = (i+1)/line_number
                    gl.glColor4f(alpha, 0, 1-alpha, 0.8)
                    gl.glVertex3f(coord_x, coord_y, 0)
                    gl.glEnd()

            t.disable()
            # self.render_indicators(WINDOW_W, WINDOW_H)
            win.flip()

            # # Drawing a rock:
            # gl.glBegin(gl.GL_QUADS)
            # gl.glColor4f(0,0,0,1)
            # gl.glVertex3f(10, 30, 0)
            # gl.glVertex3f(10, 10, 0)
            # gl.glVertex3f(30, 10, 0)
            # gl.glVertex3f(30, 30, 0)
            # gl.glEnd()

        self.viewer.onetime_geoms = []
        return arr