def fit_cartpole():
    global _env
    hist = []

    _env = gym.make('CartPole-v1')
    _env._max_episode_steps = 500
    nn = SimpleNeuralControllerNumpy(4, 1, 2, 5)
    nn.init_random_params()

    es = cma.CMAEvolutionStrategy(nn.get_parameters(), 0.2)

    for _ in range(800):
        solutions = es.ask()
        es.tell(solutions, [eval_nn(x) for x in solutions])
        hist.append(-es.result.fbest)

    _env.close()

    return hist, -es.result.fbest, es.result.xbest
コード例 #2
0
            action = 1
        else:
            action = 0
        observation, reward, done, info = env.step(action)
        total_reward += reward
        if done:
            print("Episode finished after %d timesteps" % (t + 1))
            break
    return -total_reward


sigma = 1
### A completer pour optimiser les parametres du reseau de neurones avec CMA-ES ###

nn = SimpleNeuralControllerNumpy(4, 1, 2, 5)
nn.init_random_params()
res = launch_cmaes_full_genotype(nn.get_parameters(),
                                 sigma,
                                 nbeval=1000,
                                 display=True,
                                 ma_func=eval_nn)
nn.set_parameters(res)

env.reset()

r = env.step(env.action_space.sample())  # take a random action
observations = r[0]
reward = r[1]
done = r[2]
print(nn.predict(observations))
for _ in range(1000):