def learn_cartpole(config_path):
    env = gym.make('CartPole-v0')
    env._max_episode_steps = max_evaluation_steps

    # Load configuration.
    config = Config(StateMachineGenome, DefaultReproduction,
                         DefaultSpeciesSet, DefaultStagnation,
                         config_path)

    # Create the population, which is the top-level object for a NEAT run.
    p = Population(config)

    # Add a stdout reporter to show progress in the terminal.
    p.add_reporter(StdOutReporter(True))
    stats = StatisticsReporter()
    p.add_reporter(stats)

    # Run for up for the given number of generations
    f = lambda genomes, config: eval_genomes(genomes, config, env=env)
    winner = p.run(f, num_generations)

    input("Press Enter to continue...")

    net = StateMachineNetwork.create(winner, config)
    eval_network(net, env, True)
Exemple #2
0
class NEATRunner(BaseTrainer):
    def __init__(
            self,
            config: Config,
            evaluator: GymEvaluator,
            reporters: Optional[Sequence[BaseReporter]] = None,
            num_workers: Optional[int] = multiprocessing.cpu_count(),
    ):
        self._evaluator = evaluator

        self._population = Population(config)

        reporters = reporters or []
        for reporter in reporters:
            self._population.add_reporter(reporter)

        self._num_workers = num_workers

    def _train(self, num_frames: Optional[int],
               stop_time: Optional[int]) -> DefaultGenome:
        if self._num_workers is None:
            func = lambda g, c: self._evaluate_population_fitness(
                g, c, num_frames)
        else:
            parallel = ParallelEvaluator(
                num_workers=self._num_workers,
                evaluator=self._evaluator,
                max_num_frames=num_frames or float('inf'),
            )
            func = parallel.evaluate

        if stop_time is not None:
            # it may not be 100% reliable but it's the best we can achieve without writing a custom
            # parallel executor
            func = _timeout_func(func, time(), stop_time)

        try:
            return self._population.run(
                fitness_function=func,
                n=float('inf'),
            )
        except TimeoutError:
            return self._population.best_genome

    def _evaluate_population_fitness(
        self,
        genomes: Sequence[Tuple[int, DefaultGenome]],
        config: Config,
        max_num_frames: int,
    ):
        if self._evaluator.num_frames >= max_num_frames:
            raise TimeoutError()

        for _, genome in genomes:
            genome.fitness, num_frames = self._evaluator.evaluate(
                genome, config)
            self._evaluator.num_frames += num_frames