예제 #1
0
def test_penalize_velocity():
    env = OptimumTrajectory(x0=np.zeros(1),
                            g=np.zeros(1),
                            dt=1.0,
                            penalty_vel=6.0)
    env.init()
    n_outputs = env.get_num_outputs()
    n_inputs = env.get_num_inputs()

    xva = np.empty(n_outputs)
    env.reset()

    assert_false(env.is_evaluation_done())
    env.get_outputs(xva)
    xva = np.array([0.0, 1.0, 0.0])
    env.set_inputs(xva)
    env.step_action()

    assert_false(env.is_evaluation_done())
    env.get_outputs(xva)
    xva = np.array([0.0, 1.0, 0.0])
    env.set_inputs(xva)
    env.step_action()

    assert_true(env.is_evaluation_done())
    rewards = env.get_feedback()
    assert_array_almost_equal(rewards, -6.0 * np.ones(2))
예제 #2
0
def test_dimensions():
    env = OptimumTrajectory(x0=np.zeros(3), g=np.ones(3))
    env.init()
    n_outputs = env.get_num_outputs()
    n_inputs = env.get_num_inputs()
    assert_equal(n_outputs, n_inputs)
예제 #3
0
def test_penalize_start():
    env = OptimumTrajectory(x0=np.zeros(1),
                            g=np.ones(1),
                            dt=1.0,
                            penalty_start_dist=4.0)
    env.init()
    n_outputs = env.get_num_outputs()
    n_inputs = env.get_num_inputs()

    xva = np.empty(n_outputs)
    env.reset()

    assert_false(env.is_evaluation_done())
    env.get_outputs(xva)
    xva[:] = 1.0
    env.set_inputs(xva)
    env.step_action()

    assert_false(env.is_evaluation_done())
    env.get_outputs(xva)
    assert_array_equal(xva, np.ones(3))
    env.set_inputs(xva)
    env.step_action()

    assert_true(env.is_evaluation_done())
    rewards = env.get_feedback()
    assert_array_almost_equal(rewards, np.array([-4.0, 0.0]))
예제 #4
0
def test_penalize_obstacles():
    obstacles = np.array([[0.0, 0.0]])
    env = OptimumTrajectory(dt=1.0, penalty_obstacle=8.0, obstacles=obstacles)
    env.init()
    n_outputs = env.get_num_outputs()
    n_inputs = env.get_num_inputs()

    xva = np.empty(n_outputs)
    env.reset()

    assert_false(env.is_evaluation_done())
    env.get_outputs(xva)
    env.set_inputs(xva)
    env.step_action()

    assert_false(env.is_evaluation_done())
    env.get_outputs(xva)
    env.set_inputs(xva)
    env.step_action()

    assert_true(env.is_evaluation_done())
    rewards = env.get_feedback()
    assert_array_almost_equal(rewards, -8.0 * np.ones(2))
예제 #5
0
                    dt,
                    n_features,
                    learn_covariance=True,
                    use_covar=True)

# init linear to have a guess
beh.init(4, 4)
beh.set_meta_parameters(["g", "x0"], [g, x0])
beh.imitate(np.tile(np.linspace(0, 1, 101), 2).reshape((2, 101, -1)))

env = OptimumTrajectory(x0,
                        g,
                        execution_time,
                        dt,
                        obstacles,
                        penalty_goal_dist=10000.0,
                        penalty_start_dist=10000.0,
                        penalty_obstacle=1000.0,
                        penalty_length=10.,
                        hide_acc_from_interface=True,
                        use_covar=True)
opt = CMAESOptimizer(variance=0.1**2,
                     random_state=0,
                     initial_params=beh.get_params())
bs = BlackBoxSearch(beh, opt)
controller = Controller(environment=env,
                        behavior_search=bs,
                        n_episodes=n_episodes,
                        record_inputs=True)

rewards = controller.learn(["x0", "g"], [x0, g])
from bolero.controller import Controller

n_task_dims = 2
obstacles = [np.array([0.5, 0.5]), np.array([0.6, 0.8]), np.array([0.8, 0.6])]
x0 = np.zeros(n_task_dims)
g = np.ones(n_task_dims)
execution_time = 1.0
dt = 0.01
n_features = 10
n_episodes = 500

beh = DMPBehavior(execution_time, dt, n_features)
env = OptimumTrajectory(x0,
                        g,
                        execution_time,
                        dt,
                        obstacles,
                        penalty_goal_dist=1.0,
                        penalty_obstacle=1000.0,
                        penalty_acc=1.0)
opt = CMAESOptimizer(variance=100.0**2, random_state=0)
bs = BlackBoxSearch(beh, opt)
controller = Controller(environment=env,
                        behavior_search=bs,
                        n_episodes=n_episodes,
                        record_inputs=True)

rewards = controller.learn(["x0", "g"], [x0, g])
controller.episode_with(bs.get_best_behavior(), ["x0", "g"], [x0, g])
X = np.asarray(controller.inputs_[-1])
X_hist = np.asarray(controller.inputs_)