예제 #1
0
def experiment(alg, n_runs, n_iterations, ep_per_run, use_tensorflow):
    np.random.seed()

    # MDP
    mdp = ShipSteering()

    # Policy
    if use_tensorflow:
        tensor_list = gaussian_tensor.generate(
            [3, 3, 6, 2], [[0., 150.], [0., 150.], [-np.pi, np.pi],
                           [-np.pi / 12, np.pi / 12]])

        phi = Features(tensor_list=tensor_list,
                       name='phi',
                       input_dim=mdp.info.observation_space.shape[0])
    else:
        basis = GaussianRBF.generate([3, 3, 6, 2],
                                     [[0., 150.], [0., 150.], [-np.pi, np.pi],
                                      [-np.pi / 12, np.pi / 12]])

        phi = Features(basis_list=basis)

    input_shape = (phi.size, )

    approximator_params = dict(input_dim=phi.size)
    approximator = Regressor(LinearApproximator,
                             input_shape=input_shape,
                             output_shape=mdp.info.action_space.shape,
                             params=approximator_params)

    sigma = np.array([[.05]])
    policy = MultivariateGaussianPolicy(mu=approximator, sigma=sigma)

    # Agent
    learning_rate = AdaptiveParameter(value=.01)
    algorithm_params = dict(learning_rate=learning_rate)
    fit_params = dict()
    agent_params = {
        'algorithm_params': algorithm_params,
        'fit_params': fit_params
    }
    agent = alg(policy, mdp.info, agent_params, phi)

    # Train
    core = Core(agent, mdp)
    dataset_eval = core.evaluate(n_episodes=ep_per_run)
    J = compute_J(dataset_eval, gamma=mdp.info.gamma)
    print('J at start : ' + str(np.mean(J)))

    for i in xrange(n_runs):
        core.learn(n_episodes=n_iterations * ep_per_run,
                   n_episodes_per_fit=ep_per_run)
        dataset_eval = core.evaluate(n_episodes=ep_per_run)
        J = compute_J(dataset_eval, gamma=mdp.info.gamma)
        print('J at iteration ' + str(i) + ': ' + str(np.mean(J)))

    np.save('ship_steering.npy', dataset_eval)
def experiment(alg, params, experiment_params ,subdir, i):

    np.random.seed()

    # MDP
    mdp = ShipSteering(small=True, n_steps_action=3)

    high = [150, 150, np.pi]
    low = [0, 0, -np.pi]
    n_tiles = [5, 5, 6]
    low = np.array(low, dtype=np.float)
    high = np.array(high, dtype=np.float)
    n_tilings = 1

    tilings = Tiles.generate(n_tilings=n_tilings, n_tiles=n_tiles, low=low,
                             high=high)

    phi = Features(tilings=tilings)
    input_shape = (phi.size,)

    approximator_params = dict(input_dim=phi.size)
    approximator = Regressor(LinearApproximator, input_shape=input_shape,
                             output_shape=mdp.info.action_space.shape,
                             params=approximator_params)

    #sigma = np.array([[1e-4]])
    std = np.array([3e-2])
    policy = DiagonalGaussianPolicy(mu=approximator, std=std)
    #policy = GaussianPolicy(mu=approximator, sigma=sigma)

    # Agent
    agent = alg(policy, mdp.info, features=phi, **params)

    # Train
    parameter_dataset = CollectPolicyParameter(policy)
    core = Core(agent, mdp, callbacks=[parameter_dataset])


    dataset_eval = list()
    dataset_eval_run = core.evaluate(n_episodes=ep_per_run)
    # print('distribution parameters: ', distribution.get_parameters())
    J = compute_J(dataset_eval_run, gamma=mdp.info.gamma)
    dataset_eval += dataset_eval_run
    print('J at start : ' + str(np.mean(J)))

    for n in range(n_runs):
        print('ITERATION    :', n)
        core.learn(n_episodes=n_iterations * ep_per_run,
                   n_episodes_per_fit=ep_per_run)
        dataset_eval_run = core.evaluate(n_episodes=ep_per_run)
        J = compute_J(dataset_eval_run, gamma=mdp.info.gamma)
        print('J at iteration ' + str(n) + ': ' + str(np.mean(J)))
        dataset_eval += dataset_eval_run

    mk_dir_recursive('./' + subdir + str(i))
    np.save(subdir+str(i)+'/dataset_eval_file', dataset_eval)
    np.save(subdir+str(i)+'/parameter_dataset_file', parameter_dataset)
예제 #3
0
def experiment(alg, params, subdir, exp_no):
    np.random.seed()

    # MDP
    mdp = ShipSteering(small=True, n_steps_action=3)

    high = [150, 150, np.pi]
    low = [0, 0, -np.pi]
    n_tiles = [5, 5, 6]
    low = np.array(low, dtype=np.float)
    high = np.array(high, dtype=np.float)
    n_tilings = 1

    tilings = Tiles.generate(n_tilings=n_tilings,
                             n_tiles=n_tiles,
                             low=low,
                             high=high)
    phi = Features(tilings=tilings)

    input_shape = (phi.size, )

    approximator_params = dict(input_dim=input_shape)
    approximator = Regressor(LinearApproximator,
                             input_shape=input_shape,
                             output_shape=mdp.info.action_space.shape,
                             params=approximator_params)

    policy = DeterministicPolicy(mu=approximator)

    mu = np.zeros(policy.weights_size)
    sigma = 4e-1 * np.ones(policy.weights_size)
    distribution = GaussianDiagonalDistribution(mu, sigma)

    # Agent
    agent = alg(distribution, policy, mdp.info, features=phi, **params)

    # Train
    dataset_eval = list()
    core = Core(agent, mdp)
    dataset_eval_run = core.evaluate(n_episodes=ep_per_run)
    #print('distribution parameters: ', distribution.get_parameters())
    J = compute_J(dataset_eval_run, gamma=mdp.info.gamma)
    print('J at start : ' + str(np.mean(J)))
    dataset_eval += dataset_eval_run

    for n in range(n_runs):
        core.learn(n_episodes=n_iterations * ep_per_run,
                   n_episodes_per_fit=ep_per_run)
        dataset_eval_run = core.evaluate(n_episodes=ep_per_run)
        J = compute_J(dataset_eval_run, gamma=mdp.info.gamma)
        print('J at iteration ' + str(n) + ': ' + str(np.mean(J)))
        dataset_eval += dataset_eval_run

    mk_dir_recursive('./' + subdir + str(exp_no))
    np.save(subdir + str(exp_no) + '/dataset_eval_file', dataset_eval)
예제 #4
0
def experiment(alg, params, n_epochs, n_iterations, ep_per_run):
    np.random.seed()

    # MDP
    mdp = ShipSteering()

    # Policy
    high = [150, 150, np.pi]
    low = [0, 0, -np.pi]
    n_tiles = [5, 5, 6]
    low = np.array(low, dtype=np.float)
    high = np.array(high, dtype=np.float)
    n_tilings = 1

    tilings = Tiles.generate(n_tilings=n_tilings,
                             n_tiles=n_tiles,
                             low=low,
                             high=high)

    phi = Features(tilings=tilings)
    input_shape = (phi.size, )

    approximator = Regressor(LinearApproximator,
                             input_shape=input_shape,
                             output_shape=mdp.info.action_space.shape)

    policy = DeterministicPolicy(approximator)

    mu = np.zeros(policy.weights_size)
    sigma = 4e-1 * np.ones(policy.weights_size)
    distribution = GaussianDiagonalDistribution(mu, sigma)

    # Agent
    agent = alg(distribution, policy, mdp.info, features=phi, **params)

    # Train
    print(alg.__name__)
    core = Core(agent, mdp)
    dataset_eval = core.evaluate(n_episodes=ep_per_run)
    J = compute_J(dataset_eval, gamma=mdp.info.gamma)
    print('J at start : ' + str(np.mean(J)))

    for i in range(n_epochs):
        core.learn(n_episodes=n_iterations * ep_per_run,
                   n_episodes_per_fit=ep_per_run)
        dataset_eval = core.evaluate(n_episodes=ep_per_run)
        J = compute_J(dataset_eval, gamma=mdp.info.gamma)
        print('J at iteration ' + str(i) + ': ' + str(np.mean(J)))
예제 #5
0
def build_approximator(mdp):
    high = [150, 150, np.pi]
    low = [0, 0, -np.pi]
    n_tiles = [5, 5, 8]
    low = np.array(low, dtype=np.float)
    high = np.array(high, dtype=np.float)
    n_tilings = 1

    tilings = Tiles.generate(n_tilings=n_tilings, n_tiles=n_tiles, low=low,
                             high=high, uniform=True)

    phi = Features(tilings=tilings)

    input_shape = (phi.size,)

    approximator = Regressor(LinearApproximator, input_shape=input_shape,
                             output_shape=mdp.info.action_space.shape)

    return phi, approximator
예제 #6
0
from mushroom.utils.parameters import Parameter, AdaptiveParameter

# Learning parameters
n_runs = 4
n_iterations = 10
ep_per_run = 3

# Environment
mdp = TurtlebotGazebo()

# Policy
tensor_list = gaussian_tensor.generate(
    [10, 10, 6], [[-5.0, 5.0], [-5.0, 5.0], [-np.pi, np.pi]])

phi = Features(tensor_list=tensor_list,
               name='phi',
               input_dim=mdp.info.observation_space.shape[0])

input_shape = (phi.size, )

approximator_params = dict(input_dim=phi.size)
approximator = Regressor(LinearApproximator,
                         input_shape=input_shape,
                         output_shape=mdp.info.action_space.shape,
                         params=approximator_params)

sigma = np.eye(2) * 1e-1
policy = MultivariateGaussianPolicy(mu=approximator, sigma=sigma)

# Agent
learning_rate = AdaptiveParameter(value=5)