Esempio n. 1
0
def experiment(alg, n_runs, n_iterations, ep_per_run, use_tensorflow):
    np.random.seed()

    # MDP
    mdp = ShipSteering()

    # Policy
    if use_tensorflow:
        tensor_list = gaussian_tensor.generate(
            [3, 3, 6, 2], [[0., 150.], [0., 150.], [-np.pi, np.pi],
                           [-np.pi / 12, np.pi / 12]])

        phi = Features(tensor_list=tensor_list,
                       name='phi',
                       input_dim=mdp.info.observation_space.shape[0])
    else:
        basis = GaussianRBF.generate([3, 3, 6, 2],
                                     [[0., 150.], [0., 150.], [-np.pi, np.pi],
                                      [-np.pi / 12, np.pi / 12]])

        phi = Features(basis_list=basis)

    input_shape = (phi.size, )

    approximator_params = dict(input_dim=phi.size)
    approximator = Regressor(LinearApproximator,
                             input_shape=input_shape,
                             output_shape=mdp.info.action_space.shape,
                             params=approximator_params)

    sigma = np.array([[.05]])
    policy = MultivariateGaussianPolicy(mu=approximator, sigma=sigma)

    # Agent
    learning_rate = AdaptiveParameter(value=.01)
    algorithm_params = dict(learning_rate=learning_rate)
    fit_params = dict()
    agent_params = {
        'algorithm_params': algorithm_params,
        'fit_params': fit_params
    }
    agent = alg(policy, mdp.info, agent_params, phi)

    # Train
    core = Core(agent, mdp)
    dataset_eval = core.evaluate(n_episodes=ep_per_run)
    J = compute_J(dataset_eval, gamma=mdp.info.gamma)
    print('J at start : ' + str(np.mean(J)))

    for i in xrange(n_runs):
        core.learn(n_episodes=n_iterations * ep_per_run,
                   n_episodes_per_fit=ep_per_run)
        dataset_eval = core.evaluate(n_episodes=ep_per_run)
        J = compute_J(dataset_eval, gamma=mdp.info.gamma)
        print('J at iteration ' + str(i) + ': ' + str(np.mean(J)))

    np.save('ship_steering.npy', dataset_eval)
Esempio n. 2
0
def basis_and_tensors():
    basis_rbf = GaussianRBF.generate([3, 3], [[0., 1.], [-.5, .5]])
    tensor_rbf = gaussian_tensor.generate([3, 3], [[0., 1.], [-.5, .5]])
    features_1 = Features(tensor_list=tensor_rbf, name='rbf', input_dim=2)
    features_2 = Features(basis_list=basis_rbf)

    x = np.random.rand(10, 2) + [0., -.5]

    y_1 = features_1(x)
    y_2 = features_2(x)

    assert np.allclose(y_1, y_2)
Esempio n. 3
0
def tensor():
    rbf = gaussian_tensor.generate([3, 3], [[0., 1.], [-.5, .5]])
    features = Features(tensor_list=rbf, name='rbf', input_dim=2)

    x = np.random.rand(10, 2) + [0., -.5]

    y = features(x)

    for i, x_i in enumerate(x):
        assert np.allclose(features(x_i), y[i])

    x_1 = x[:, 0].reshape(-1, 1)
    x_2 = x[:, 1].reshape(-1, 1)

    assert np.allclose(features(x_1, x_2), y)

    for i, x_i in enumerate(zip(x_1, x_2)):
        assert np.allclose(features(x_i[0], x_i[1]), y[i])
Esempio n. 4
0
from mushroom.features.features import Features
from mushroom.features.tensors import gaussian_tensor
from mushroom.policy import GaussianPolicy, MultivariateGaussianPolicy, MultivariateDiagonalGaussianPolicy
from mushroom.utils.dataset import compute_J
from mushroom.utils.parameters import Parameter, AdaptiveParameter

# Learning parameters
n_runs = 4
n_iterations = 10
ep_per_run = 3

# Environment
mdp = TurtlebotGazebo()

# Policy
tensor_list = gaussian_tensor.generate(
    [10, 10, 6], [[-5.0, 5.0], [-5.0, 5.0], [-np.pi, np.pi]])

phi = Features(tensor_list=tensor_list,
               name='phi',
               input_dim=mdp.info.observation_space.shape[0])

input_shape = (phi.size, )

approximator_params = dict(input_dim=phi.size)
approximator = Regressor(LinearApproximator,
                         input_shape=input_shape,
                         output_shape=mdp.info.action_space.shape,
                         params=approximator_params)

sigma = np.eye(2) * 1e-1
policy = MultivariateGaussianPolicy(mu=approximator, sigma=sigma)