def train(self): MiscUtils.rm_hist() for epi in range(20000): s = self.sim.reset() if self.best_score > Config.max_fitness(): break while 1: a = self.renet.choose_action(s) s_, done, r = self.sim.step(a) self.renet.store_transition(s, a, r, s_) if self.renet.memory_counter > MEM_CAP: self.renet.learn() if done: print('episode: {} score: {}'.format( epi, self.sim.travel_range)) if self.sim.travel_range > self.best_score: self.best_score = self.sim.travel_range print('*' * 20) print('New best score! score: {}'.format( self.best_score)) print('*' * 20) self.sim.save_gif() if done: break s = s_ MiscUtils.finish_info()
def train(self): def plot(): plt.cla() if len(self.range_hist) > 2000: self.range_hist.pop(0) self.range_hist.append(self.sim.travel_range) plt.plot(range(len(self.range_hist)), self.range_hist, linewidth=0.5) plt.plot(range(len(self.range_hist)), self.range_hist, 'b^-') plt.xlabel('Testing number') plt.ylabel('Fitness') plt.pause(0.01) if self.sim.travel_range > Config.max_fitness(): plt.savefig('res/rl_statistics.png') MiscUtils.rm_hist() print('*' * 50) print('Gathering experience...') print('*' * 50) for epi in range(20000): s = self.sim.reset() if self.best_score > Config.max_fitness(): break while 1: a = self.renet.choose_action(s) s_, done, r = self.sim.step(a) self.renet.store_transition(s, a, r, s_) if self.renet.memory_counter > MEM_CAP: self.renet.learn() if done: plot() print('episode: {} score: {}'.format( epi, self.sim.travel_range)) if self.sim.travel_range > self.best_score: self.best_score = self.sim.travel_range print('*' * 20) print('New best score! score: {}'.format( self.best_score)) print('*' * 20) self.sim.save_gif() if done: break s = s_ MiscUtils.finish_info()
def step(self, action): def update_range(range, old_pos, new_pos): res = np.array(old_pos) - np.array(new_pos) return range + (res[0]**2 + res[1]**2)**0.5 m = self.map.draw_map_bg() ImageUtils.draw_car(m, self.pos, self.orientation, self.colliders) self.movie.append(m) old_pos = self.pos self.pos = MiscUtils.get_next_pos(self.pos, self.orientation, Config.car_speed()) self.orientation += 4 * (action - N_ACTIONS // 2) self.travel_range = update_range(self.travel_range, old_pos, self.pos) radar_data = ImageUtils.radar_data(self.pos, self.orientation, self.colliders) done = False if ColliderUtils.collision( (self.pos, self.orientation), self.wall_rects) or self.travel_range > Config.max_fitness(): done = True l1, l2, l3, l4, l5 = radar_data def calc(): side_limit, front_limit = 7, 15 if l1 < side_limit or l5 < side_limit or l2 < front_limit or \ l4 < front_limit or l3 < Config.path_width(): return -1 return 1 r = calc() return radar_data, done, r
def step(self, action): def update_range(range, old_pos, new_pos): res = np.array(old_pos) - np.array(new_pos) return range + (res[0]**2 + res[1]**2)**0.5 m = self.map.draw_map_bg() ImageUtils.draw_car(m, self.pos, self.orientation, self.colliders) self.movie.append(m) old_pos = self.pos self.pos = MiscUtils.get_next_pos(self.pos, self.orientation, Config.car_speed()) self.orientation += 4 * (action - N_ACTIONS // 2) self.travel_range = update_range(self.travel_range, old_pos, self.pos) radar_data = ImageUtils.radar_data(self.pos, self.orientation, self.colliders) done = False if ColliderUtils.collision( (self.pos, self.orientation), self.wall_rects) or self.travel_range > Config.max_fitness(): done = True l1, l2, l3, l4, l5 = radar_data r1 = -3 * abs(l1 - l5) r2 = -1 * abs(l2 - l4) if l3 < Config.path_width(): r3 = -Config.path_width() else: r3 = Config.path_width() r = r1 + r2 + r3 return radar_data, done, r
def train(self): MiscUtils.rm_hist() local_dir = os.path.dirname(__file__) config_path = os.path.join(local_dir, 'config-feedforward') config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction, neat.DefaultSpeciesSet, neat.DefaultStagnation, config_path) p = neat.Population(config) p.add_reporter(neat.StdOutReporter(True)) stats = neat.StatisticsReporter() p.add_reporter(stats) p.add_reporter(neat.Checkpointer(50)) winner = p.run(self.eval_genomes, 5000) MiscUtils.finish_info()
def single_drive_with_whole_population(self, nn_list): marker = np.zeros_like(nn_list, dtype=bool) def update_range(range, old_pos, new_pos): res = np.array(old_pos) - np.array(new_pos) return range + (res[0]**2 + res[1]**2)**0.5 travel_range = np.zeros_like(nn_list) movie = [] # pos = np.full_like(nn_list, (100, 30)) pos = np.empty_like(nn_list) for i in range(len(pos)): pos[i] = Config.start_pos() orientation = np.full_like(nn_list, 0) while 1: if np.all(marker): if not os.path.exists(Config.result_dir()): os.makedirs(Config.result_dir()) gif_name = 'res/generation_{}{}'.format(MiscUtils.generation_cnt, self.result_file) ImageUtils.save_img_lst_2_gif(movie, gif_name) MiscUtils.generation_cnt += 1 return travel_range m = self.map.draw_map_bg() for i in range(len(nn_list)): p, o, nn = pos[i], orientation[i], nn_list[i] if marker[i]: ImageUtils.draw_car(m, p, o, self.colliders, draw_radar=False) continue if ColliderUtils.collision((p, o), self.wall_rects) or travel_range[i] > Config.max_fitness(): marker[i] = True ImageUtils.draw_car(m, p, o, self.colliders) # if marker[i]: # continue radar_data = ImageUtils.radar_data(p, o, self.colliders) pos_new = MiscUtils.get_next_pos(p, o, Config.car_speed()) travel_range[i] = update_range(travel_range[i], p, pos_new) pos[i] = pos_new left, right = nn.activate(radar_data) clamp = Config.angle_clamp() turning = left - right turning = clamp if turning > clamp else turning turning = -1 * clamp if turning < -1 * clamp else turning orientation[i] += turning movie.append(m)
def step(self, action): def update_range(range, old_pos, new_pos): res = np.array(old_pos) - np.array(new_pos) return range + (res[0]**2 + res[1]**2)**0.5 m = self.map.draw_map_bg() ImageUtils.draw_car(m, self.pos, self.orientation, self.colliders, speed=self.speed, status=self.travel_range) self.movie.append(m) old_pos = self.pos self.pos = MiscUtils.get_next_pos(self.pos, self.orientation, self.speed) self.orientation += 4 * (action - N_ACTIONS // 2) self.travel_range = update_range(self.travel_range, old_pos, self.pos) ### SPEED ### radar_data = ImageUtils.radar_data(self.pos, self.orientation, self.colliders) l1, l2, l3, l4, l5 = radar_data if l3 >= 150 and self.speed < 8: self.speed += 0.1 self.status = 'Accelerating' elif l3 < 150 and self.speed > 3: self.speed -= 0.2 self.status = 'Braking' if ColliderUtils.collision( (self.pos, self.orientation), self.wall_rects) or self.travel_range > self.max_fitness: done = True else: done = False def calc(): side_limit, front_limit = 7, 15 if l1 < side_limit or l5 < side_limit \ or l2 < front_limit or l4 < front_limit \ or l3 < Config.path_width(): return -1 return 1 r = calc() return radar_data, done, r
def train(self): MiscUtils.rm_hist() print('*' * 50) print('Starting RF Learning') print('*' * 50) for epi in range(2000000): radar_data = self.sim.reset() if self.best_score > self.max_fitness: break while True: action = self.renet.choose_action(radar_data) radar_data_, done, r = self.sim.step(action) self.renet.store_transition(radar_data, action, r, radar_data_) if self.renet.memory_counter > MEM_CAP: self.renet.learn() if done: print('episode: {} score: {}'.format( epi, self.sim.travel_range)) if self.sim.travel_range > self.best_score: self.best_score = self.sim.travel_range print('*' * 20) print('New best score! score: {}'.format( self.best_score)) print('*' * 20) self.sim.save_gif() if done: break radar_data = radar_data_
def single_drive_single_car(self): def update_orientation(old_orientation): return old_orientation + 0.3 movie = [] pos = (100, 30) orientation = -15 speed = 2 while 1: if ColliderUtils.collision((pos, orientation), self.wall_rects): break m = self.map.draw_map_bg() ImageUtils.draw_car(m, pos, orientation, self.colliders) movie.append(m) pos = MiscUtils.get_next_pos(pos, orientation, speed) orientation = update_orientation(orientation) ImageUtils.save_img_lst_2_gif(movie, self.result_file) ImageUtils.play_gif(self.result_file)