def test_evolution(self): p = Population(10, 3, 1) status = p.get_status() for s in status.keys(): output = [] for i in range(status.get(s, 0)): output.append(p.run(s, i, [1, 2, 3])) p.set_score(s, i, 1) p.evolve()
def test_flow(self): p = Population(10, 3, 1) for _ in range(100): status = p.get_status() for s in status.keys(): output = [] for i in range(status.get(s, 0)): out = p.run(s, i, [1, 2, 3])[0] output.append(out) p.set_score(s, i, random.randrange(1, 10)) print(s, output) p.evolve() # print a sample pr = Printer(p.population[next(iter(p.population))][0]) pr.print()
def train(): env = gym.make('LunarLanderContinuous-v2') try: p = Population.load(FULLNAME) print('Existing state loaded') except FileNotFoundError as e: print(str(e) + '. Creating new state') p = Population(10000, env.observation_space.shape[0], env.action_space.shape[0]) while True: try: max_reward = -99999 status = p.get_status() for s in status.keys(): for i in range(status.get(s, 0)): ob = env.reset() reward_sum = 0 while True: action = action_final_activation(p.run(s, i, ob)) ob, reward, done, info = env.step(action) reward_sum = reward_sum + reward if done: break p.set_score(s, i, reward_sum) max_reward = np.max([reward_sum, max_reward]) print(p.generation, max_reward, p.population.keys()) try: p.save(AUTOSAVE) except RuntimeError as e: print('error saving: {}'.format(str(e))) p.evolve() except KeyboardInterrupt as e: try: print('\nsaving before exit') p.save(FULLNAME) sys.exit('Bye!') except RuntimeError as e: print('error saving: {}'.format(str(e)))
min_position = np.min([min_position, ob[0]]) reward_sum = reward_sum + reward if done: break reward_sum = reward_sum + ((max_position + 1.2) - (min_position + 1.2)) max_reward = np.max([reward_sum, max_reward]) p.set_score(s, i, reward_sum) if max_position >= target_reward: winner = (s, i) break if max_position >= target_reward: break print('Generation: {} Score: {} Max position: {} Population: {}'.format(p.generation, reward_sum, max_position, p.population.keys())) if max_position >= target_reward: break p.evolve() print('Species {} is the winner'.format(winner[0])) ob = env.reset() while True: action = action = p.run(winner[0], winner[1], ob) ob, reward, done, info = env.step(np.argmax(action)) env.render() if done: ob = env.reset()