def learn(setup_fun, variance): #ik, beh, mp_keys, mp_values = cfg.make_approx_cart_dmp(cfg.x0, cfg.g, cfg.execution_time, cfg.dt) #ik, beh, mp_keys, mp_values = cfg.make_exact_cart_dmp(cfg.x0, cfg.g, cfg.execution_time, cfg.dt) ik, beh, mp_keys, mp_values = cfg.make_joint_dmp(cfg.x0, cfg.g, cfg.execution_time, cfg.dt) env = Pendulum(x0=cfg.x0, g=cfg.g, execution_time=cfg.execution_time, dt=cfg.dt) opt = CMAESOptimizer(variance=variance, random_state=0) bs = BlackBoxSearch(beh, opt) controller = Controller(environment=env, behavior_search=bs, n_episodes=n_episodes, verbose=2) rewards = controller.learn(mp_keys, mp_values) best = bs.get_best_behavior() best_params = best.get_params() np.save("best_params_pendulum_joint.npy", best_params) reward = controller.episode_with(best) ax = env.plot() plt.show()
def test_black_box_search_from_dicts(): beh = {"type": "bolero.representation.ConstantBehavior"} opt = {"type": "bolero.optimizer.NoOptimizer"} bs = BlackBoxSearch(beh, opt) bs.init(5, 5) # NoOptimizer should be initialized with the parameters from the behavior assert_array_equal(bs.behavior.get_params(), bs.optimizer.initial_params)
def learn(name, setup_fun, run): ik, beh, mp_keys, mp_values = setup_fun(cfg.x0, cfg.g, cfg.execution_time, cfg.dt) env = ViaPointEnvironment(ik, cfg.x0, cfg.via_points, cfg.execution_time, cfg.dt, cfg.qlo, cfg.qhi, penalty_vel=cfg.penalty_vel, penalty_acc=cfg.penalty_acc, penalty_via_point=cfg.penalty_via_point) opt = CMAESOptimizer(variance=cfg.variance[name], random_state=run) bs = BlackBoxSearch(beh, opt) controller = Controller(environment=env, behavior_search=bs, n_episodes=n_episodes, verbose=0) rewards = controller.learn(mp_keys, mp_values) best = bs.get_best_behavior() reward = controller.episode_with(best) return name, rewards, reward.sum()
def test_black_box_search_requires_optimizer(): class NoOptimizerSubclass(object): pass bs = BlackBoxSearch(ConstantBehavior(), NoOptimizerSubclass()) assert_raises_regexp(TypeError, "expects instance of Optimizer", bs.init, 5, 5)
def learn(name, run, setup_fun, variance): ik, beh, mp_keys, mp_values = setup_fun( cfg.x0, cfg.g, cfg.execution_time, cfg.dt) env = Pendulum( x0=cfg.x0, g=cfg.g, execution_time=cfg.execution_time, dt=cfg.dt ) opt = CMAESOptimizer(variance=variance, random_state=run) bs = BlackBoxSearch(beh, opt) controller = Controller(environment=env, behavior_search=bs, n_episodes=n_episodes, verbose=2) rewards = controller.learn(mp_keys, mp_values) best = bs.get_best_behavior() reward = controller.episode_with(best) return name, rewards, reward.sum()
def test_save_black_box_search(): bs = BlackBoxSearch(ConstantBehavior(), NoOptimizer()) bs.init(5, 5) assert_pickle("BlackBoxSearch", bs) path = "." + os.sep bs.write_results(path) bs.get_behavior_from_results(path) filename = path + "BlackBoxSearch.pickle" assert_true(os.path.exists(filename)) if os.path.exists(filename): os.remove(filename)
def test_black_box_search_protocol(): n_inputs, n_outputs = 5, 5 bs = BlackBoxSearch(ConstantBehavior(), NoOptimizer()) bs.init(n_inputs, n_outputs) assert_false(bs.is_behavior_learning_done()) beh = bs.get_next_behavior() inputs = np.zeros(n_inputs) beh.set_inputs(inputs) outputs = np.empty(n_outputs) beh.get_outputs(outputs) bs.set_evaluation_feedback(np.array([0.0]))
cfg.dt, cfg.qlo, cfg.qhi, penalty_vel=cfg.penalty_vel, penalty_acc=cfg.penalty_acc, penalty_via_point=cfg.penalty_via_point, log_to_stdout=True) if os.path.exists("initial_params.txt"): initial_params = np.loadtxt("initial_params.txt") else: initial_params = None opt = CMAESOptimizer(initial_params=initial_params, variance=cfg.variance["approxik"], random_state=0) bs = BlackBoxSearch(beh, opt) controller = Controller(environment=env, behavior_search=bs, n_episodes=1000, verbose=2) rewards = controller.learn(mp_keys, mp_values) best = bs.get_best_behavior() best_params = best.get_params() np.save("best_params_viapoint_joint.npy", best_params) reward = controller.episode_with(best) print(reward.sum()) plt.plot(rewards) ax = env.plot() ax.view_init(azim=-110, elev=30)
to solve the problem and policy search algorithm usually work very well in this domain. """ print(__doc__) import numpy as np import matplotlib.pyplot as plt from bolero.environment import OpenAiGym from bolero.behavior_search import BlackBoxSearch from bolero.optimizer import CMAESOptimizer from bolero.representation import LinearBehavior from bolero.controller import Controller beh = LinearBehavior() env = OpenAiGym("CartPole-v0", render=False, seed=0) opt = CMAESOptimizer(variance=10.0**2, random_state=0) bs = BlackBoxSearch(beh, opt) controller = Controller(environment=env, behavior_search=bs, n_episodes=300) rewards = controller.learn() controller.episode_with(bs.get_best_behavior()) plt.figure() ax = plt.subplot(111) ax.set_title("Optimization progress") ax.plot(rewards) ax.set_xlabel("Episode") ax.set_ylabel("Reward") ax.set_ylim(-10, 210) plt.show()
env = OptimumTrajectory(x0, g, execution_time, dt, obstacles, penalty_goal_dist=10000.0, penalty_start_dist=10000.0, penalty_obstacle=1000.0, penalty_length=10., hide_acc_from_interface=True, use_covar=True) opt = CMAESOptimizer(variance=0.1**2, random_state=0, initial_params=beh.get_params()) bs = BlackBoxSearch(beh, opt) controller = Controller(environment=env, behavior_search=bs, n_episodes=n_episodes, record_inputs=True) rewards = controller.learn(["x0", "g"], [x0, g]) controller.episode_with(bs.get_best_behavior(), ["x0", "g"], [x0, g]) X = np.asarray(controller.inputs_[-1]) X_hist = np.asarray(controller.inputs_) plt.figure(figsize=(8, 5)) ax = plt.subplot(121) ax.set_title("Optimization progress") ax.plot(rewards) ax.set_xlabel("Episode")
import numpy as np from bolero.wrapper import CppBLLoader from bolero.controller import Controller from bolero.behavior_search import BlackBoxSearch from bolero.optimizer import CMAESOptimizer from dmp_behavior import DMPBehavior from first_feedback import FirstFeedback if __name__ == "__main__": environment_name = "throwing_environment" bll = CppBLLoader() bll.load_library(environment_name) env = bll.acquire_contextual_environment(environment_name) env = FirstFeedback(env, random_state=0) env.request_context(np.array([1.5, 1.0])) beh = DMPBehavior(dt=0.01, execution_time=0.5, n_features=5) opt = CMAESOptimizer(variance=200.0 ** 2, active=True, random_state=0) bs = BlackBoxSearch(beh, opt) ctrl = Controller(environment=env, behavior_search=bs, n_episodes=200, verbose=2) meta_params = [np.array([0.0, -0.8, -0.7]), np.array([0.5, 0.5, 0.5]), np.array([0.0, 0.5, 0.5])] print(ctrl.learn(["x0", "g", "gd"], meta_params))