def train(self): MiscUtils.rm_hist() for epi in range(20000): s = self.sim.reset() if self.best_score > Config.max_fitness(): break while 1: a = self.renet.choose_action(s) s_, done, r = self.sim.step(a) self.renet.store_transition(s, a, r, s_) if self.renet.memory_counter > MEM_CAP: self.renet.learn() if done: print('episode: {} score: {}'.format( epi, self.sim.travel_range)) if self.sim.travel_range > self.best_score: self.best_score = self.sim.travel_range print('*' * 20) print('New best score! score: {}'.format( self.best_score)) print('*' * 20) self.sim.save_gif() if done: break s = s_ MiscUtils.finish_info()
def train(self): def plot(): plt.cla() if len(self.range_hist) > 2000: self.range_hist.pop(0) self.range_hist.append(self.sim.travel_range) plt.plot(range(len(self.range_hist)), self.range_hist, linewidth=0.5) plt.plot(range(len(self.range_hist)), self.range_hist, 'b^-') plt.xlabel('Testing number') plt.ylabel('Fitness') plt.pause(0.01) if self.sim.travel_range > Config.max_fitness(): plt.savefig('res/rl_statistics.png') MiscUtils.rm_hist() print('*' * 50) print('Gathering experience...') print('*' * 50) for epi in range(20000): s = self.sim.reset() if self.best_score > Config.max_fitness(): break while 1: a = self.renet.choose_action(s) s_, done, r = self.sim.step(a) self.renet.store_transition(s, a, r, s_) if self.renet.memory_counter > MEM_CAP: self.renet.learn() if done: plot() print('episode: {} score: {}'.format( epi, self.sim.travel_range)) if self.sim.travel_range > self.best_score: self.best_score = self.sim.travel_range print('*' * 20) print('New best score! score: {}'.format( self.best_score)) print('*' * 20) self.sim.save_gif() if done: break s = s_ MiscUtils.finish_info()
def train(self): MiscUtils.rm_hist() local_dir = os.path.dirname(__file__) config_path = os.path.join(local_dir, 'config-feedforward') config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction, neat.DefaultSpeciesSet, neat.DefaultStagnation, config_path) p = neat.Population(config) p.add_reporter(neat.StdOutReporter(True)) stats = neat.StatisticsReporter() p.add_reporter(stats) p.add_reporter(neat.Checkpointer(50)) winner = p.run(self.eval_genomes, 5000) MiscUtils.finish_info()
def train(self): MiscUtils.rm_hist() print('*' * 50) print('Starting RF Learning') print('*' * 50) for epi in range(2000000): radar_data = self.sim.reset() if self.best_score > self.max_fitness: break while True: action = self.renet.choose_action(radar_data) radar_data_, done, r = self.sim.step(action) self.renet.store_transition(radar_data, action, r, radar_data_) if self.renet.memory_counter > MEM_CAP: self.renet.learn() if done: print('episode: {} score: {}'.format( epi, self.sim.travel_range)) if self.sim.travel_range > self.best_score: self.best_score = self.sim.travel_range print('*' * 20) print('New best score! score: {}'.format( self.best_score)) print('*' * 20) self.sim.save_gif() if done: break radar_data = radar_data_