Ejemplo n.º 1
0
def learn_cartpole(config_path):
    env = gym.make('CartPole-v0')
    env._max_episode_steps = max_evaluation_steps

    # Load configuration.
    config = Config(StateMachineGenome, DefaultReproduction,
                         DefaultSpeciesSet, DefaultStagnation,
                         config_path)

    # Create the population, which is the top-level object for a NEAT run.
    p = Population(config)

    # Add a stdout reporter to show progress in the terminal.
    p.add_reporter(StdOutReporter(True))
    stats = StatisticsReporter()
    p.add_reporter(stats)

    # Run for up for the given number of generations
    f = lambda genomes, config: eval_genomes(genomes, config, env=env)
    winner = p.run(f, num_generations)

    input("Press Enter to continue...")

    net = StateMachineNetwork.create(winner, config)
    eval_network(net, env, True)
Ejemplo n.º 2
0
class NEATRunner(BaseTrainer):
    def __init__(
            self,
            config: Config,
            evaluator: GymEvaluator,
            reporters: Optional[Sequence[BaseReporter]] = None,
            num_workers: Optional[int] = multiprocessing.cpu_count(),
    ):
        self._evaluator = evaluator

        self._population = Population(config)

        reporters = reporters or []
        for reporter in reporters:
            self._population.add_reporter(reporter)

        self._num_workers = num_workers

    def _train(self, num_frames: Optional[int],
               stop_time: Optional[int]) -> DefaultGenome:
        if self._num_workers is None:
            func = lambda g, c: self._evaluate_population_fitness(
                g, c, num_frames)
        else:
            parallel = ParallelEvaluator(
                num_workers=self._num_workers,
                evaluator=self._evaluator,
                max_num_frames=num_frames or float('inf'),
            )
            func = parallel.evaluate

        if stop_time is not None:
            # it may not be 100% reliable but it's the best we can achieve without writing a custom
            # parallel executor
            func = _timeout_func(func, time(), stop_time)

        try:
            return self._population.run(
                fitness_function=func,
                n=float('inf'),
            )
        except TimeoutError:
            return self._population.best_genome

    def _evaluate_population_fitness(
        self,
        genomes: Sequence[Tuple[int, DefaultGenome]],
        config: Config,
        max_num_frames: int,
    ):
        if self._evaluator.num_frames >= max_num_frames:
            raise TimeoutError()

        for _, genome in genomes:
            genome.fitness, num_frames = self._evaluator.evaluate(
                genome, config)
            self._evaluator.num_frames += num_frames
Ejemplo n.º 3
0
 def test_evolution(self):
     p = Population(10, 3, 1)
     status = p.get_status()
     for s in status.keys():
         output = []
         for i in range(status.get(s, 0)):
             output.append(p.run(s, i, [1, 2, 3]))
             p.set_score(s, i, 1)
     p.evolve()
Ejemplo n.º 4
0
    def test_flow(self):
        p = Population(10, 3, 1)
        for _ in range(100):
            status = p.get_status()
            for s in status.keys():
                output = []
                for i in range(status.get(s, 0)):
                    out = p.run(s, i, [1, 2, 3])[0]
                    output.append(out)
                    p.set_score(s, i, random.randrange(1, 10))
                print(s, output)
            p.evolve()

        # print a sample
        pr = Printer(p.population[next(iter(p.population))][0])
        pr.print()
Ejemplo n.º 5
0
def train():
    env = gym.make('LunarLanderContinuous-v2')

    try:
        p = Population.load(FULLNAME)
        print('Existing state loaded')
    except FileNotFoundError as e:
        print(str(e) + '. Creating new state')
        p = Population(10000, env.observation_space.shape[0],
                       env.action_space.shape[0])

    while True:
        try:
            max_reward = -99999
            status = p.get_status()
            for s in status.keys():
                for i in range(status.get(s, 0)):
                    ob = env.reset()
                    reward_sum = 0
                    while True:
                        action = action_final_activation(p.run(s, i, ob))
                        ob, reward, done, info = env.step(action)
                        reward_sum = reward_sum + reward
                        if done:
                            break
                    p.set_score(s, i, reward_sum)
                    max_reward = np.max([reward_sum, max_reward])
            print(p.generation, max_reward, p.population.keys())

            try:
                p.save(AUTOSAVE)
            except RuntimeError as e:
                print('error saving: {}'.format(str(e)))

            p.evolve()

        except KeyboardInterrupt as e:

            try:
                print('\nsaving before exit')
                p.save(FULLNAME)
                sys.exit('Bye!')
            except RuntimeError as e:
                print('error saving: {}'.format(str(e)))
Ejemplo n.º 6
0
passing_score = 500
p = Population(1000, 2, 3)
target_reward = 0.5
max_reward = -999999
winner = None
max_position = -1.2
min_position = 0.6
while True:
    status = p.get_status()
    for s in status.keys():
        output = []
        for i in range(status.get(s, 0)):
            ob = env.reset()
            reward_sum = 200
            while True:
                action = p.run(s, i, ob)
                ob, reward, done, info = env.step(np.argmax(action))
                max_position = np.max([max_position, ob[0]])
                min_position = np.min([min_position, ob[0]])
                reward_sum = reward_sum + reward
                if done:
                    break

            reward_sum = reward_sum + ((max_position + 1.2) - (min_position + 1.2))
            max_reward = np.max([reward_sum, max_reward])
            p.set_score(s, i, reward_sum)

            if max_position >= target_reward:
                winner = (s, i)
                break