def test_acer(): def Sphere(individual): """Sphere test objective function. F(x) = sum_{i=1}^d xi^2 d=1,2,3,... Range: [-100,100] Minima: 0 """ return sum(x**2 for x in individual) nx=5 bounds={} for i in range(1,nx+1): bounds['x'+str(i)]=['int', -100, 100] #create an enviroment class env=CreateEnvironment(method='acer', fit=Sphere, bounds=bounds, mode='min', episode_length=50) #create a callback function to log data cb=RLLogger(check_freq=1, mode='min') #create an acer object based on the env object acer = ACER(MlpPolicy, env=env, n_steps=25, q_coef=0.55, ent_coef=0.02) #optimise the enviroment class acer.learn(total_timesteps=2000, callback=cb) #print the best results print('--------------- ACER results ---------------') print('The best value of x found:', cb.xbest) print('The best value of y found:', cb.rbest) return
def Sphere(individual): """Sphere test objective function. F(x) = sum_{i=1}^d xi^2 d=1,2,3,... Range: [-100,100] Minima: 0 """ return sum(x**2 for x in individual) nx = 5 bounds = {} for i in range(1, nx + 1): bounds['x' + str(i)] = ['float', -10, 10] #create an enviroment class env = CreateEnvironment(method='acktr', fit=Sphere, bounds=bounds, mode='min', episode_length=50) #create a callback function to log data cb = RLLogger(check_freq=1, mode='min') #create an acktr object based on the env object acktr = ACKTR(MlpPolicy, env=env, n_steps=12, seed=1) #optimise the enviroment class acktr.learn(total_timesteps=2000, callback=cb) #print the best results print('--------------- ACKTR results ---------------') print('The best value of x found:', cb.xbest) print('The best value of y found:', cb.rbest)
p.add_reporter(stats) p.add_reporter(neat.Checkpointer(10)) winner = p.run(self.eval_genomes, ngen) with open('winner.pkl', 'wb') as output: pickle.dump(winner, output, 1) return self.best_x, self.best_fit_correct, self.history nx = 10 bounds = {} for i in range(1, nx + 1): bounds['x' + str(i)] = ['float', -100, 100] #create an enviroment class env = CreateEnvironment(method='neat', fit=Sphere, bounds=bounds, mode='max', episode_length=50) neats = NEAT(env=env, config='config-feedforward') x_best, y_best, neat_hist = neats.evolute(ngen=60) assert Sphere(x_best) == y_best print(x_best, y_best) import matplotlib.pyplot as plt plt.figure() plt.plot(neat_hist['global_fitness']) #plt.plot(neat_hist['local_fitness'])
#--------------------------------- bounds = {} bounds['x1'] = ['int', 1, 99] bounds['x2'] = [ 'grid', (0.0625, 0.125, 0.1875, 0.25, 0.3125, 0.375, 0.4375, 0.5, 0.5625, 0.625) ] bounds['x3'] = ['float', 10, 200] bounds['x4'] = ['float', 10, 200] #--------------------------------- # PPO #--------------------------------- env = CreateEnvironment(method='ppo', fit=Vessel, bounds=bounds, mode='min', episode_length=50) cb = RLLogger(check_freq=1, mode='min') ppo = PPO2(policy=MlpPolicy, env=env, n_steps=20, seed=1) ppo.learn(total_timesteps=1000, callback=cb) print('--------------- PPO results ---------------') print('The best value of x found:', cb.xbest) print('The best value of y found:', cb.rbest) assert Vessel(cb.xbest) - cb.rbest < 1e-3 #--------------------------------- # A2C #--------------------------------- cb = RLLogger(check_freq=1, mode='min')