def train(self):
        MiscUtils.rm_hist()

        for epi in range(20000):
            s = self.sim.reset()
            if self.best_score > Config.max_fitness():
                break

            while 1:
                a = self.renet.choose_action(s)
                s_, done, r = self.sim.step(a)

                self.renet.store_transition(s, a, r, s_)

                if self.renet.memory_counter > MEM_CAP:
                    self.renet.learn()
                    if done:
                        print('episode: {} score: {}'.format(
                            epi, self.sim.travel_range))
                        if self.sim.travel_range > self.best_score:
                            self.best_score = self.sim.travel_range
                            print('*' * 20)
                            print('New best score! score: {}'.format(
                                self.best_score))
                            print('*' * 20)
                            self.sim.save_gif()

                if done:
                    break
                s = s_

        MiscUtils.finish_info()
    def train(self):
        def plot():
            plt.cla()
            if len(self.range_hist) > 2000:
                self.range_hist.pop(0)
            self.range_hist.append(self.sim.travel_range)

            plt.plot(range(len(self.range_hist)),
                     self.range_hist,
                     linewidth=0.5)
            plt.plot(range(len(self.range_hist)), self.range_hist, 'b^-')
            plt.xlabel('Testing number')
            plt.ylabel('Fitness')
            plt.pause(0.01)
            if self.sim.travel_range > Config.max_fitness():
                plt.savefig('res/rl_statistics.png')

        MiscUtils.rm_hist()

        print('*' * 50)
        print('Gathering experience...')
        print('*' * 50)

        for epi in range(20000):
            s = self.sim.reset()
            if self.best_score > Config.max_fitness():
                break

            while 1:
                a = self.renet.choose_action(s)
                s_, done, r = self.sim.step(a)

                self.renet.store_transition(s, a, r, s_)

                if self.renet.memory_counter > MEM_CAP:
                    self.renet.learn()
                    if done:
                        plot()
                        print('episode: {} score: {}'.format(
                            epi, self.sim.travel_range))
                        if self.sim.travel_range > self.best_score:
                            self.best_score = self.sim.travel_range
                            print('*' * 20)
                            print('New best score! score: {}'.format(
                                self.best_score))
                            print('*' * 20)
                            self.sim.save_gif()

                if done:
                    break
                s = s_

        MiscUtils.finish_info()
    def step(self, action):
        def update_range(range, old_pos, new_pos):
            res = np.array(old_pos) - np.array(new_pos)
            return range + (res[0]**2 + res[1]**2)**0.5

        m = self.map.draw_map_bg()
        ImageUtils.draw_car(m, self.pos, self.orientation, self.colliders)
        self.movie.append(m)
        old_pos = self.pos

        self.pos = MiscUtils.get_next_pos(self.pos, self.orientation,
                                          Config.car_speed())
        self.orientation += 4 * (action - N_ACTIONS // 2)
        self.travel_range = update_range(self.travel_range, old_pos, self.pos)

        radar_data = ImageUtils.radar_data(self.pos, self.orientation,
                                           self.colliders)
        done = False
        if ColliderUtils.collision(
            (self.pos, self.orientation),
                self.wall_rects) or self.travel_range > Config.max_fitness():
            done = True

        l1, l2, l3, l4, l5 = radar_data

        def calc():
            side_limit, front_limit = 7, 15
            if l1 < side_limit or l5 < side_limit or l2 < front_limit or \
                    l4 < front_limit or l3 < Config.path_width():
                return -1
            return 1

        r = calc()

        return radar_data, done, r
    def step(self, action):
        def update_range(range, old_pos, new_pos):
            res = np.array(old_pos) - np.array(new_pos)
            return range + (res[0]**2 + res[1]**2)**0.5

        m = self.map.draw_map_bg()
        ImageUtils.draw_car(m, self.pos, self.orientation, self.colliders)
        self.movie.append(m)
        old_pos = self.pos

        self.pos = MiscUtils.get_next_pos(self.pos, self.orientation,
                                          Config.car_speed())
        self.orientation += 4 * (action - N_ACTIONS // 2)
        self.travel_range = update_range(self.travel_range, old_pos, self.pos)

        radar_data = ImageUtils.radar_data(self.pos, self.orientation,
                                           self.colliders)
        done = False
        if ColliderUtils.collision(
            (self.pos, self.orientation),
                self.wall_rects) or self.travel_range > Config.max_fitness():
            done = True

        l1, l2, l3, l4, l5 = radar_data

        r1 = -3 * abs(l1 - l5)
        r2 = -1 * abs(l2 - l4)
        if l3 < Config.path_width():
            r3 = -Config.path_width()
        else:
            r3 = Config.path_width()
        r = r1 + r2 + r3

        return radar_data, done, r
    def train(self):
        MiscUtils.rm_hist()


        local_dir = os.path.dirname(__file__)
        config_path = os.path.join(local_dir, 'config-feedforward')
        config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                             neat.DefaultSpeciesSet, neat.DefaultStagnation,
                             config_path)
        p = neat.Population(config)
        p.add_reporter(neat.StdOutReporter(True))
        stats = neat.StatisticsReporter()
        p.add_reporter(stats)
        p.add_reporter(neat.Checkpointer(50))
        winner = p.run(self.eval_genomes, 5000)

        MiscUtils.finish_info()
    def single_drive_with_whole_population(self, nn_list):

        marker = np.zeros_like(nn_list, dtype=bool)

        def update_range(range, old_pos, new_pos):
            res = np.array(old_pos) - np.array(new_pos)
            return range + (res[0]**2 + res[1]**2)**0.5

        travel_range = np.zeros_like(nn_list)
        movie = []
        # pos = np.full_like(nn_list, (100, 30))
        pos = np.empty_like(nn_list)
        for i in range(len(pos)):
            pos[i] = Config.start_pos()
        orientation = np.full_like(nn_list, 0)

        while 1:

            if np.all(marker):

                if not os.path.exists(Config.result_dir()):
                    os.makedirs(Config.result_dir())

                gif_name = 'res/generation_{}{}'.format(MiscUtils.generation_cnt, self.result_file)
                ImageUtils.save_img_lst_2_gif(movie, gif_name)
                MiscUtils.generation_cnt += 1
                return travel_range

            m = self.map.draw_map_bg()
            for i in range(len(nn_list)):

                p, o, nn = pos[i], orientation[i], nn_list[i]
                if marker[i]:
                    ImageUtils.draw_car(m, p, o, self.colliders, draw_radar=False)
                    continue

                if ColliderUtils.collision((p, o), self.wall_rects) or travel_range[i] > Config.max_fitness():
                    marker[i] = True

                ImageUtils.draw_car(m, p, o, self.colliders)

                # if marker[i]:
                #     continue

                radar_data = ImageUtils.radar_data(p, o, self.colliders)
                pos_new = MiscUtils.get_next_pos(p, o, Config.car_speed())
                travel_range[i] = update_range(travel_range[i], p, pos_new)
                pos[i] = pos_new

                left, right = nn.activate(radar_data)

                clamp = Config.angle_clamp()
                turning = left - right
                turning = clamp if turning > clamp else turning
                turning = -1 * clamp if turning < -1 * clamp else turning
                orientation[i] += turning

            movie.append(m)
Ejemplo n.º 7
0
    def step(self, action):
        def update_range(range, old_pos, new_pos):
            res = np.array(old_pos) - np.array(new_pos)
            return range + (res[0]**2 + res[1]**2)**0.5

        m = self.map.draw_map_bg()
        ImageUtils.draw_car(m,
                            self.pos,
                            self.orientation,
                            self.colliders,
                            speed=self.speed,
                            status=self.travel_range)
        self.movie.append(m)
        old_pos = self.pos

        self.pos = MiscUtils.get_next_pos(self.pos, self.orientation,
                                          self.speed)

        self.orientation += 4 * (action - N_ACTIONS // 2)
        self.travel_range = update_range(self.travel_range, old_pos, self.pos)

        ### SPEED ###
        radar_data = ImageUtils.radar_data(self.pos, self.orientation,
                                           self.colliders)

        l1, l2, l3, l4, l5 = radar_data

        if l3 >= 150 and self.speed < 8:
            self.speed += 0.1
            self.status = 'Accelerating'

        elif l3 < 150 and self.speed > 3:
            self.speed -= 0.2
            self.status = 'Braking'

        if ColliderUtils.collision(
            (self.pos, self.orientation),
                self.wall_rects) or self.travel_range > self.max_fitness:
            done = True
        else:
            done = False

        def calc():
            side_limit, front_limit = 7, 15

            if l1 < side_limit or l5 < side_limit \
                or l2 < front_limit or l4 < front_limit \
                or l3 < Config.path_width():

                return -1

            return 1

        r = calc()

        return radar_data, done, r
Ejemplo n.º 8
0
    def train(self):

        MiscUtils.rm_hist()

        print('*' * 50)
        print('Starting RF Learning')
        print('*' * 50)

        for epi in range(2000000):

            radar_data = self.sim.reset()

            if self.best_score > self.max_fitness:
                break

            while True:
                action = self.renet.choose_action(radar_data)

                radar_data_, done, r = self.sim.step(action)

                self.renet.store_transition(radar_data, action, r, radar_data_)

                if self.renet.memory_counter > MEM_CAP:
                    self.renet.learn()

                    if done:

                        print('episode: {} score: {}'.format(
                            epi, self.sim.travel_range))
                        if self.sim.travel_range > self.best_score:
                            self.best_score = self.sim.travel_range
                            print('*' * 20)
                            print('New best score! score: {}'.format(
                                self.best_score))
                            print('*' * 20)
                            self.sim.save_gif()

                if done:
                    break
                radar_data = radar_data_
    def single_drive_single_car(self):
        def update_orientation(old_orientation):
            return old_orientation + 0.3

        movie = []
        pos = (100, 30)
        orientation = -15
        speed = 2

        while 1:

            if ColliderUtils.collision((pos, orientation), self.wall_rects):
                break
            m = self.map.draw_map_bg()
            ImageUtils.draw_car(m, pos, orientation, self.colliders)
            movie.append(m)
            pos = MiscUtils.get_next_pos(pos, orientation, speed)
            orientation = update_orientation(orientation)

        ImageUtils.save_img_lst_2_gif(movie, self.result_file)
        ImageUtils.play_gif(self.result_file)