def test_penalize_velocity(): env = OptimumTrajectory(x0=np.zeros(1), g=np.zeros(1), dt=1.0, penalty_vel=6.0) env.init() n_outputs = env.get_num_outputs() n_inputs = env.get_num_inputs() xva = np.empty(n_outputs) env.reset() assert_false(env.is_evaluation_done()) env.get_outputs(xva) xva = np.array([0.0, 1.0, 0.0]) env.set_inputs(xva) env.step_action() assert_false(env.is_evaluation_done()) env.get_outputs(xva) xva = np.array([0.0, 1.0, 0.0]) env.set_inputs(xva) env.step_action() assert_true(env.is_evaluation_done()) rewards = env.get_feedback() assert_array_almost_equal(rewards, -6.0 * np.ones(2))
def test_dimensions(): env = OptimumTrajectory(x0=np.zeros(3), g=np.ones(3)) env.init() n_outputs = env.get_num_outputs() n_inputs = env.get_num_inputs() assert_equal(n_outputs, n_inputs)
def test_penalize_start(): env = OptimumTrajectory(x0=np.zeros(1), g=np.ones(1), dt=1.0, penalty_start_dist=4.0) env.init() n_outputs = env.get_num_outputs() n_inputs = env.get_num_inputs() xva = np.empty(n_outputs) env.reset() assert_false(env.is_evaluation_done()) env.get_outputs(xva) xva[:] = 1.0 env.set_inputs(xva) env.step_action() assert_false(env.is_evaluation_done()) env.get_outputs(xva) assert_array_equal(xva, np.ones(3)) env.set_inputs(xva) env.step_action() assert_true(env.is_evaluation_done()) rewards = env.get_feedback() assert_array_almost_equal(rewards, np.array([-4.0, 0.0]))
def test_penalize_obstacles(): obstacles = np.array([[0.0, 0.0]]) env = OptimumTrajectory(dt=1.0, penalty_obstacle=8.0, obstacles=obstacles) env.init() n_outputs = env.get_num_outputs() n_inputs = env.get_num_inputs() xva = np.empty(n_outputs) env.reset() assert_false(env.is_evaluation_done()) env.get_outputs(xva) env.set_inputs(xva) env.step_action() assert_false(env.is_evaluation_done()) env.get_outputs(xva) env.set_inputs(xva) env.step_action() assert_true(env.is_evaluation_done()) rewards = env.get_feedback() assert_array_almost_equal(rewards, -8.0 * np.ones(2))
dt, n_features, learn_covariance=True, use_covar=True) # init linear to have a guess beh.init(4, 4) beh.set_meta_parameters(["g", "x0"], [g, x0]) beh.imitate(np.tile(np.linspace(0, 1, 101), 2).reshape((2, 101, -1))) env = OptimumTrajectory(x0, g, execution_time, dt, obstacles, penalty_goal_dist=10000.0, penalty_start_dist=10000.0, penalty_obstacle=1000.0, penalty_length=10., hide_acc_from_interface=True, use_covar=True) opt = CMAESOptimizer(variance=0.1**2, random_state=0, initial_params=beh.get_params()) bs = BlackBoxSearch(beh, opt) controller = Controller(environment=env, behavior_search=bs, n_episodes=n_episodes, record_inputs=True) rewards = controller.learn(["x0", "g"], [x0, g])
from bolero.controller import Controller n_task_dims = 2 obstacles = [np.array([0.5, 0.5]), np.array([0.6, 0.8]), np.array([0.8, 0.6])] x0 = np.zeros(n_task_dims) g = np.ones(n_task_dims) execution_time = 1.0 dt = 0.01 n_features = 10 n_episodes = 500 beh = DMPBehavior(execution_time, dt, n_features) env = OptimumTrajectory(x0, g, execution_time, dt, obstacles, penalty_goal_dist=1.0, penalty_obstacle=1000.0, penalty_acc=1.0) opt = CMAESOptimizer(variance=100.0**2, random_state=0) bs = BlackBoxSearch(beh, opt) controller = Controller(environment=env, behavior_search=bs, n_episodes=n_episodes, record_inputs=True) rewards = controller.learn(["x0", "g"], [x0, g]) controller.episode_with(bs.get_best_behavior(), ["x0", "g"], [x0, g]) X = np.asarray(controller.inputs_[-1]) X_hist = np.asarray(controller.inputs_)