def test_cmaes_get_best_params_best(): opt = CMAESOptimizer() opt.init(10) params = np.empty(10) opt.get_next_parameters(params) opt.set_evaluation_feedback(np.array([0.0])) best_params = opt.get_best_parameters(method="best") assert_array_almost_equal(params, best_params)
def test_cmaes_stop_fitness_variance(): opt = CMAESOptimizer(n_samples_per_update=5) opt.init(2) params = np.empty(2) it = 0 while not opt.is_behavior_learning_done(): opt.get_next_parameters(params) opt.set_evaluation_feedback([0.0]) it += 1 assert_equal(it, 6)
def test_record_feedbacks(): opt = CMAESOptimizer(initial_params=np.zeros(2)) ctrl = Controller(environment=ObjectiveFunction(), behavior_search=JustOptimizer(opt), record_feedbacks=True, accumulate_feedbacks=False) returns = ctrl.learn() assert_array_equal(returns, ctrl.feedbacks_)
def learn(name, setup_fun, run): ik, beh, mp_keys, mp_values = setup_fun(cfg.x0, cfg.g, cfg.execution_time, cfg.dt) env = ViaPointEnvironment(ik, cfg.x0, cfg.via_points, cfg.execution_time, cfg.dt, cfg.qlo, cfg.qhi, penalty_vel=cfg.penalty_vel, penalty_acc=cfg.penalty_acc, penalty_via_point=cfg.penalty_via_point) opt = CMAESOptimizer(variance=cfg.variance[name], random_state=run) bs = BlackBoxSearch(beh, opt) controller = Controller(environment=env, behavior_search=bs, n_episodes=n_episodes, verbose=0) rewards = controller.learn(mp_keys, mp_values) best = bs.get_best_behavior() reward = controller.episode_with(best) return name, rewards, reward.sum()
def learn(setup_fun, variance): #ik, beh, mp_keys, mp_values = cfg.make_approx_cart_dmp(cfg.x0, cfg.g, cfg.execution_time, cfg.dt) #ik, beh, mp_keys, mp_values = cfg.make_exact_cart_dmp(cfg.x0, cfg.g, cfg.execution_time, cfg.dt) ik, beh, mp_keys, mp_values = cfg.make_joint_dmp(cfg.x0, cfg.g, cfg.execution_time, cfg.dt) env = Pendulum(x0=cfg.x0, g=cfg.g, execution_time=cfg.execution_time, dt=cfg.dt) opt = CMAESOptimizer(variance=variance, random_state=0) bs = BlackBoxSearch(beh, opt) controller = Controller(environment=env, behavior_search=bs, n_episodes=n_episodes, verbose=2) rewards = controller.learn(mp_keys, mp_values) best = bs.get_best_behavior() best_params = best.get_params() np.save("best_params_pendulum_joint.npy", best_params) reward = controller.episode_with(best) ax = env.plot() plt.show()
def test_noncontextual_behavior_search(): opt = CMAESOptimizer(initial_params=np.zeros(1)) assert_raises_regexp(TypeError, "requires contextual behavior search", ContextualController, environment=ContextualObjectiveFunction(), behavior_search=JustOptimizer(opt))
def test_cmaes_stop_conditioning(): def objective(x): return -1e10 * x[1] ** 2 opt = CMAESOptimizer(random_state=0) opt.init(2) params = np.empty(2) it = 0 while not opt.is_behavior_learning_done(): opt.get_next_parameters(params) opt.set_evaluation_feedback(objective(params)) it += 1 assert_less(it, 600)
def test_learn_controller_cmaes_sphere(): opt = CMAESOptimizer(initial_params=np.zeros(2), random_state=0) ctrl = Controller(environment=ObjectiveFunction(random_state=0), behavior_search=JustOptimizer(opt), n_episodes=200) returns = ctrl.learn() dist_to_maximum = returns.max() - ctrl.environment.get_maximum_feedback() assert_greater(dist_to_maximum, -1e-5)
def test_record_inputs(): opt = CMAESOptimizer(initial_params=np.zeros(2)) ctrl = Controller(environment=ObjectiveFunction(), behavior_search=JustOptimizer(opt), record_inputs=True) returns = ctrl.learn() assert_equal(len(returns), 10) assert_equal(np.array(ctrl.inputs_).shape, (10, 1, 2))
def test_record_test_results(): opt = CMAESOptimizer(initial_params=np.zeros(2)) ctrl = Controller(environment=ObjectiveFunction(), behavior_search=JustOptimizer(opt), n_episodes_before_test=10, n_episodes=100) ctrl.learn() results = np.array(ctrl.test_results_) assert_equal(results.shape[0], 10) assert_true(np.all(results[:-1] <= results[1:]))
def test_cmaes_respects_bounds(): opt = CMAESOptimizer(bounds=[[-5, 4], [10, 20]], variance=10000.0, random_state=0) opt.init(2) params = np.empty(2) opt.get_next_parameters(params) assert_true(np.all(params >= np.array([-5, 10]))) assert_true(np.all(params <= np.array([4, 20])))
def learn(name, run, setup_fun, variance): ik, beh, mp_keys, mp_values = setup_fun( cfg.x0, cfg.g, cfg.execution_time, cfg.dt) env = Pendulum( x0=cfg.x0, g=cfg.g, execution_time=cfg.execution_time, dt=cfg.dt ) opt = CMAESOptimizer(variance=variance, random_state=run) bs = BlackBoxSearch(beh, opt) controller = Controller(environment=env, behavior_search=bs, n_episodes=n_episodes, verbose=2) rewards = controller.learn(mp_keys, mp_values) best = bs.get_best_behavior() reward = controller.episode_with(best) return name, rewards, reward.sum()
def test_cmaes_no_initial_params(): opt = CMAESOptimizer() opt.init(10) params = np.empty(10) opt.get_next_parameters(params)
def test_controller_cmaes_sphere(): opt = CMAESOptimizer(initial_params=np.zeros(2)) ctrl = Controller(environment=ObjectiveFunction(), behavior_search=JustOptimizer(opt)) returns = ctrl.learn() assert_equal(len(returns), 10)
import numpy as np from bolero.wrapper import CppBLLoader from bolero.controller import Controller from bolero.behavior_search import BlackBoxSearch from bolero.optimizer import CMAESOptimizer from dmp_behavior import DMPBehavior from first_feedback import FirstFeedback if __name__ == "__main__": environment_name = "throwing_environment" bll = CppBLLoader() bll.load_library(environment_name) env = bll.acquire_contextual_environment(environment_name) env = FirstFeedback(env, random_state=0) env.request_context(np.array([1.5, 1.0])) beh = DMPBehavior(dt=0.01, execution_time=0.5, n_features=5) opt = CMAESOptimizer(variance=200.0 ** 2, active=True, random_state=0) bs = BlackBoxSearch(beh, opt) ctrl = Controller(environment=env, behavior_search=bs, n_episodes=200, verbose=2) meta_params = [np.array([0.0, -0.8, -0.7]), np.array([0.5, 0.5, 0.5]), np.array([0.0, 0.5, 0.5])] print(ctrl.learn(["x0", "g", "gd"], meta_params))
import os import numpy as np import matplotlib.pyplot as plt from throw_environment import ThrowEnvironment from bolero.behavior_search import BlackBoxSearch from bolero.optimizer import CMAESOptimizer from bolero.controller import Controller import throw_config as cfg ik, beh, mp_keys, mp_values, initial_params, var = cfg.make_approx_cart_dmp(cfg.x0, cfg.g, cfg.execution_time, cfg.dt) #ik, beh, mp_keys, mp_values, initial_params, var = cfg.make_exact_cart_dmp(cfg.x0, cfg.g, cfg.execution_time, cfg.dt) #ik, beh, mp_keys, mp_values, initial_params, var = cfg.make_joint_dmp(cfg.x0, cfg.g, cfg.execution_time, cfg.dt) env = ThrowEnvironment(start=cfg.x0j, random_state=0, verbose=1) opt = CMAESOptimizer(initial_params=initial_params, variance=var, random_state=0) bs = BlackBoxSearch(beh, opt) controller = Controller(environment=env, behavior_search=bs, n_episodes=800, verbose=2) rewards = controller.learn(mp_keys, mp_values) best = bs.get_best_behavior() reward = controller.episode_with(best) print(reward.sum()) plt.plot(rewards) plt.show()
beh.set_meta_parameters(["g", "x0"], [g, x0]) beh.imitate(np.tile(np.linspace(0, 1, 101), 2).reshape((2, 101, -1))) env = OptimumTrajectory(x0, g, execution_time, dt, obstacles, penalty_goal_dist=10000.0, penalty_start_dist=10000.0, penalty_obstacle=1000.0, penalty_length=10., hide_acc_from_interface=True, use_covar=True) opt = CMAESOptimizer(variance=0.1**2, random_state=0, initial_params=beh.get_params()) bs = BlackBoxSearch(beh, opt) controller = Controller(environment=env, behavior_search=bs, n_episodes=n_episodes, record_inputs=True) rewards = controller.learn(["x0", "g"], [x0, g]) controller.episode_with(bs.get_best_behavior(), ["x0", "g"], [x0, g]) X = np.asarray(controller.inputs_[-1]) X_hist = np.asarray(controller.inputs_) plt.figure(figsize=(8, 5)) ax = plt.subplot(121) ax.set_title("Optimization progress")
to solve the problem and policy search algorithm usually work very well in this domain. """ print(__doc__) import numpy as np import matplotlib.pyplot as plt from bolero.environment import OpenAiGym from bolero.behavior_search import BlackBoxSearch from bolero.optimizer import CMAESOptimizer from bolero.representation import LinearBehavior from bolero.controller import Controller beh = LinearBehavior() env = OpenAiGym("CartPole-v0", render=False, seed=0) opt = CMAESOptimizer(variance=10.0**2, random_state=0) bs = BlackBoxSearch(beh, opt) controller = Controller(environment=env, behavior_search=bs, n_episodes=300) rewards = controller.learn() controller.episode_with(bs.get_best_behavior()) plt.figure() ax = plt.subplot(111) ax.set_title("Optimization progress") ax.plot(rewards) ax.set_xlabel("Episode") ax.set_ylabel("Reward") ax.set_ylim(-10, 210) plt.show()
e = Ellipse(xy=mean, width=width, height=height, angle=np.degrees(angle), ec=color, fc="none", lw=3, ls="dashed") plt.gca().add_artist(e) n_generations = 20 n_samples_per_update = 20 n_params = 2 for objective_name in ["Sphere", "SchaffersF7"]: objective = FUNCTIONS[objective_name](0, n_params) initial_params = 4.0 * np.ones(n_params) cmaes = CMAESOptimizer( initial_params=initial_params, variance=0.1, active=True, n_samples_per_update=n_samples_per_update, bounds=np.array([[-5, 5], [-5, 5]]), random_state=0) cmaes.init(n_params) n_rows = 4 plt.figure(figsize=(n_generations * 3 / n_rows, 3 * n_rows)) path = [] for it in range(n_generations): plt.subplot(n_rows, int(n_generations / n_rows, it + 1)) plot_objective() last_mean = cmaes.mean.copy() path.append(last_mean) last_cov = cmaes.var * cmaes.cov X = np.empty((n_samples_per_update, n_params)) F = np.empty((n_samples_per_update, 1))
def test_cmaes_dimensions_mismatch(): opt = CMAESOptimizer(initial_params=np.zeros(5)) assert_raises_regexp(ValueError, "Number of dimensions", opt.init, 10)
def test_cmaes_diagonal_cov(): opt = CMAESOptimizer(covariance=np.zeros(10)) opt.init(10) params = np.empty(10) opt.get_next_parameters(params)
def test_acmaes(): x = np.zeros(n_dims) opt = CMAESOptimizer(x, active=True, random_state=0, log_to_stdout=False) opt.init(n_dims) r = eval_loop(x, opt, n_dims) assert_greater(r.max(), -1e-5)
cfg.via_points, cfg.execution_time, cfg.dt, cfg.qlo, cfg.qhi, penalty_vel=cfg.penalty_vel, penalty_acc=cfg.penalty_acc, penalty_via_point=cfg.penalty_via_point, log_to_stdout=True) if os.path.exists("initial_params.txt"): initial_params = np.loadtxt("initial_params.txt") else: initial_params = None opt = CMAESOptimizer(initial_params=initial_params, variance=cfg.variance["approxik"], random_state=0) bs = BlackBoxSearch(beh, opt) controller = Controller(environment=env, behavior_search=bs, n_episodes=1000, verbose=2) rewards = controller.learn(mp_keys, mp_values) best = bs.get_best_behavior() best_params = best.get_params() np.save("best_params_viapoint_joint.npy", best_params) reward = controller.episode_with(best) print(reward.sum()) plt.plot(rewards)
results[i] = objective.feedback(x) opt.set_evaluation_feedback(results[i]) return results - objective.f_opt n_dims = 2 n_iter = 800 x = np.zeros(n_dims) optimizers = { "None": NoOptimizer(x), "Random": RandomOptimizer(x, random_state=0), "CMA-ES": CMAESOptimizer(x, bounds=np.array([[-5, 5]]), random_state=0), "aCMA-ES": CMAESOptimizer(x, bounds=np.array([[-5, 5]]), active=True, random_state=0), "REPS": REPSOptimizer(x, random_state=0), "ACM-ES": ACMESOptimizer(x, random_state=0), "XNES": XNESOptimizer(x, random_state=0), "CEM": CEMOptimizer(x, random_state=0) } plt.figure(figsize=(12, 8)) plt.xlabel("Function evaluations") plt.ylabel("$f(x)$")