예제 #1
0
def test_acer():
    def Sphere(individual):
            """Sphere test objective function.
                    F(x) = sum_{i=1}^d xi^2
                    d=1,2,3,...
                    Range: [-100,100]
                    Minima: 0
            """
            return sum(x**2 for x in individual)
    
    nx=5
    bounds={}
    for i in range(1,nx+1):
            bounds['x'+str(i)]=['int', -100, 100]
    
    #create an enviroment class
    env=CreateEnvironment(method='acer', fit=Sphere, 
                          bounds=bounds, mode='min', episode_length=50)
    #create a callback function to log data
    cb=RLLogger(check_freq=1, mode='min')
    #create an acer object based on the env object
    acer = ACER(MlpPolicy, env=env, n_steps=25, q_coef=0.55, ent_coef=0.02)
    #optimise the enviroment class
    acer.learn(total_timesteps=2000, callback=cb)
    #print the best results
    print('--------------- ACER results ---------------')
    print('The best value of x found:', cb.xbest)
    print('The best value of y found:', cb.rbest)
    
    return
예제 #2
0
파일: test_acer.py 프로젝트: XuboGU/neorl
def test_acer():
    #create an object from the class
    env = IntegerSphere()
    #create a callback function to log data
    cb = RLLogger(check_freq=1)
    #create an acer object based on the env object
    acer = ACER(MlpPolicy, env=env, n_steps=25, q_coef=0.55, ent_coef=0.02)
    #optimise the enviroment class
    acer.learn(total_timesteps=2000, callback=cb)
    #print the best results
    print('--------------- ACER results ---------------')
    print('The best value of x found:', cb.xbest)
    print('The best value of y found:', cb.rbest)

    return
예제 #3
0
def test_ppo():
    #create an object from the class
    env = Sphere()
    #create a callback function to log data
    cb = RLLogger(check_freq=1)
    #create an a2c object based on the env object
    ppo = PPO2(MlpPolicy, env=env, n_steps=12)
    #optimise the enviroment class
    ppo.learn(total_timesteps=2000, callback=cb)
    #print the best results
    print('--------------- PPO results ---------------')
    print('The best value of x found:', cb.xbest)
    print('The best value of y found:', cb.rbest)

    return
예제 #4
0
def test_dqn():
    #create an object from the class
    env = IntegerSphere()
    #create a callback function to log data
    cb = RLLogger(check_freq=1)
    #create an a2c object based on the env object
    dqn = DQN(DQNPolicy, env=env)
    #optimise the enviroment class
    dqn.learn(total_timesteps=2000, callback=cb)
    #print the best results
    print('--------------- DQN results ---------------')
    print('The best value of x found:', cb.xbest)
    print('The best value of y found:', cb.rbest)

    return
예제 #5
0
                    F(x) = sum_{i=1}^d xi^2
                    d=1,2,3,...
                    Range: [-100,100]
                    Minima: 0
            """
        #-1 is used to convert minimization to maximization
        return -sum(x**2 for x in individual)

    def reset(self):
        self.done = False
        return self.action_space.sample()

    def render(self, mode='human'):
        pass


#--------------------------------------------------------
# RL Optimisation
#--------------------------------------------------------
#create an object from the class
env = Sphere()
#create a callback function to log data
cb = RLLogger(check_freq=1)
#create an acktr object based on the env object
acktr = ACKTR(MlpPolicy, env=env, n_steps=12)
#optimise the enviroment class
acktr.learn(total_timesteps=2500, callback=cb)
#print the best results
print('--------------- ACKTR results ---------------')
print('The best value of x found:', cb.xbest)
print('The best value of y found:', cb.rbest)
예제 #6
0
파일: ex_acktr.py 프로젝트: mradaideh/neorl
def Sphere(individual):
    """Sphere test objective function.
                F(x) = sum_{i=1}^d xi^2
                d=1,2,3,...
                Range: [-100,100]
                Minima: 0
        """
    return sum(x**2 for x in individual)


nx = 5
bounds = {}
for i in range(1, nx + 1):
    bounds['x' + str(i)] = ['float', -10, 10]

#create an enviroment class
env = CreateEnvironment(method='acktr',
                        fit=Sphere,
                        bounds=bounds,
                        mode='min',
                        episode_length=50)
#create a callback function to log data
cb = RLLogger(check_freq=1, mode='min')
#create an acktr object based on the env object
acktr = ACKTR(MlpPolicy, env=env, n_steps=12, seed=1)
#optimise the enviroment class
acktr.learn(total_timesteps=2000, callback=cb)
#print the best results
print('--------------- ACKTR results ---------------')
print('The best value of x found:', cb.xbest)
print('The best value of y found:', cb.rbest)