def fit_cartpole(): global _env hist = [] _env = gym.make('CartPole-v1') _env._max_episode_steps = 500 nn = SimpleNeuralControllerNumpy(4, 1, 2, 5) nn.init_random_params() es = cma.CMAEvolutionStrategy(nn.get_parameters(), 0.2) for _ in range(800): solutions = es.ask() es.tell(solutions, [eval_nn(x) for x in solutions]) hist.append(-es.result.fbest) _env.close() return hist, -es.result.fbest, es.result.xbest
action = 1 else: action = 0 observation, reward, done, info = env.step(action) total_reward += reward if done: print("Episode finished after %d timesteps" % (t + 1)) break return -total_reward sigma = 1 ### A completer pour optimiser les parametres du reseau de neurones avec CMA-ES ### nn = SimpleNeuralControllerNumpy(4, 1, 2, 5) nn.init_random_params() res = launch_cmaes_full_genotype(nn.get_parameters(), sigma, nbeval=1000, display=True, ma_func=eval_nn) nn.set_parameters(res) env.reset() r = env.step(env.action_space.sample()) # take a random action observations = r[0] reward = r[1] done = r[2] print(nn.predict(observations)) for _ in range(1000):