Esempio n. 1
0
#!/usr/bin/env python
import numpy as np

from rltoolbox.algorithm.cmac import *
from rltoolbox.environment.continuous import BallBeam
from rltoolbox.approximator import CMACApproximator
from rltoolbox.misc import compare_learning_curves, plot_learning_stats

if __name__ == "__main__":

    algorithm = CMACAHC
    environment = BallBeam(max_steps=10000,
                           state_variables_ranges=[[-0.6, -0.2, 0.2, 0.6],
                                                   [-0.6, -0.2, 0.2, 0.6]])
    environment.approximate_with(CMACApproximator, n_layers=4)
    n_episodes = 10
    n_repeats = 10

    book_parameters = {
        'alpha': 0.005 / 4,
        'beta': 0.005 / 400,
        'epsilon': 0.1,
        'lambd': 0.5,
        'gamma': 0.995
    }

    learning_curves = np.zeros((n_repeats, n_episodes))
    for i in range(n_repeats):
        alg_instance = algorithm(environment, **book_parameters)
        alg_instance.learn(n_episodes, render=False)
        learning_curves[i, :] = alg_instance.steps_per_episode
Esempio n. 2
0
#!/usr/bin/env python
import numpy as np

from rltoolbox.algorithm.cmac import *
from rltoolbox.environment.continuous import BallBeam
from rltoolbox.approximator import CMACApproximator
from rltoolbox.misc import compare_learning_curves, plot_learning_stats

if __name__ == "__main__":

    algorithm = CMACSARSA
    environment = BallBeam(max_steps=10000)
    environment.approximate_with(CMACApproximator, n_layers=4)
    n_episodes = 10
    n_repeats = 5

    book_parameters = {
        'alpha': 1,
        'gamma': 0.995,
        'epsilon': 0.1,
        'lambd': 0.0
    }

    learning_curves = np.zeros((n_repeats, n_episodes))
    for i in range(n_repeats):
        alg_instance = algorithm(environment, **book_parameters)
        alg_instance.learn(n_episodes, render=False)
        learning_curves[i, :] = alg_instance.steps_per_episode

    plot_learning_stats(learning_curves,
                        'BallBeam CMACSARSA learning',
Esempio n. 3
0
#!/usr/bin/env python
import numpy as np

from rltoolbox.algorithm.fuzzy import *
from rltoolbox.environment.continuous import BallBeam
from rltoolbox.approximator import FuzzyApproximator
from rltoolbox.misc import compare_learning_curves, plot_learning_stats

if __name__ == "__main__":

    algorithm = FQ
    environment = BallBeam(max_steps=10000)
    environment.approximate_with(FuzzyApproximator)
    n_episodes = 5
    n_repeats = 5

    book_parameters = {
        'alpha': 0.1,
        'epsilon': 0.1,
        'gamma': 0.995,
        'lambd': 0.5
    }

    learning_curves = np.zeros((n_repeats, n_episodes))
    for i in range(n_repeats):
        alg_instance = algorithm(environment, **book_parameters)
        alg_instance.learn(n_episodes, render=False)
        learning_curves[i, :] = alg_instance.steps_per_episode

    plot_learning_stats(learning_curves, 'BallBeam FQ learning', savefig=False)