def test_record_test_results(): opt = CMAESOptimizer(initial_params=np.zeros(2)) ctrl = Controller(environment=ObjectiveFunction(), behavior_search=JustOptimizer(opt), n_episodes_before_test=10, n_episodes=100) ctrl.learn() results = np.array(ctrl.test_results_) assert_equal(results.shape[0], 10) assert_true(np.all(results[:-1] <= results[1:]))
def learn(setup_fun, variance): #ik, beh, mp_keys, mp_values = cfg.make_approx_cart_dmp(cfg.x0, cfg.g, cfg.execution_time, cfg.dt) #ik, beh, mp_keys, mp_values = cfg.make_exact_cart_dmp(cfg.x0, cfg.g, cfg.execution_time, cfg.dt) ik, beh, mp_keys, mp_values = cfg.make_joint_dmp(cfg.x0, cfg.g, cfg.execution_time, cfg.dt) env = Pendulum(x0=cfg.x0, g=cfg.g, execution_time=cfg.execution_time, dt=cfg.dt) opt = CMAESOptimizer(variance=variance, random_state=0) bs = BlackBoxSearch(beh, opt) controller = Controller(environment=env, behavior_search=bs, n_episodes=n_episodes, verbose=2) rewards = controller.learn(mp_keys, mp_values) best = bs.get_best_behavior() best_params = best.get_params() np.save("best_params_pendulum_joint.npy", best_params) reward = controller.episode_with(best) ax = env.plot() plt.show()
def learn(name, setup_fun, run): ik, beh, mp_keys, mp_values = setup_fun(cfg.x0, cfg.g, cfg.execution_time, cfg.dt) env = ViaPointEnvironment(ik, cfg.x0, cfg.via_points, cfg.execution_time, cfg.dt, cfg.qlo, cfg.qhi, penalty_vel=cfg.penalty_vel, penalty_acc=cfg.penalty_acc, penalty_via_point=cfg.penalty_via_point) opt = CMAESOptimizer(variance=cfg.variance[name], random_state=run) bs = BlackBoxSearch(beh, opt) controller = Controller(environment=env, behavior_search=bs, n_episodes=n_episodes, verbose=0) rewards = controller.learn(mp_keys, mp_values) best = bs.get_best_behavior() reward = controller.episode_with(best) return name, rewards, reward.sum()
def test_record_feedbacks(): opt = CMAESOptimizer(initial_params=np.zeros(2)) ctrl = Controller(environment=ObjectiveFunction(), behavior_search=JustOptimizer(opt), record_feedbacks=True, accumulate_feedbacks=False) returns = ctrl.learn() assert_array_equal(returns, ctrl.feedbacks_)
def test_record_inputs(): opt = CMAESOptimizer(initial_params=np.zeros(2)) ctrl = Controller(environment=ObjectiveFunction(), behavior_search=JustOptimizer(opt), record_inputs=True) returns = ctrl.learn() assert_equal(len(returns), 10) assert_equal(np.array(ctrl.inputs_).shape, (10, 1, 2))
def test_learn_controller_cmaes_sphere(): opt = CMAESOptimizer(initial_params=np.zeros(2), random_state=0) ctrl = Controller(environment=ObjectiveFunction(random_state=0), behavior_search=JustOptimizer(opt), n_episodes=200) returns = ctrl.learn() dist_to_maximum = returns.max() - ctrl.environment.get_maximum_feedback() assert_greater(dist_to_maximum, -1e-5)
def test_controller_cmaes_sphere_via_config(): config = { "Environment": {"type": "bolero.environment.ObjectiveFunction"}, "BehaviorSearch": { "type": "bolero.behavior_search.JustOptimizer", "optimizer": {"type": "bolero.optimizer.CMAESOptimizer", "initial_params": np.zeros(2)}} } ctrl = Controller(config) returns = ctrl.learn() assert_equal(len(returns), 10)
def test_mc_rl(): env = OpenAiGym("FrozenLake-v0", render=False, seed=1) try: env.init() except ImportError: raise SkipTest("gym is not installed") bs = MonteCarloRL(env.get_discrete_action_space(), random_state=1) ctrl = Controller(environment=env, behavior_search=bs, n_episodes=10000, finish_after_convergence=True) returns = ctrl.learn() assert_less(len(returns), 1000) beh = bs.get_best_behavior() rewards = ctrl.episode_with(beh) assert_equal(sum(rewards), 1.0)
def learn(name, run, setup_fun, variance): ik, beh, mp_keys, mp_values = setup_fun( cfg.x0, cfg.g, cfg.execution_time, cfg.dt) env = Pendulum( x0=cfg.x0, g=cfg.g, execution_time=cfg.execution_time, dt=cfg.dt ) opt = CMAESOptimizer(variance=variance, random_state=run) bs = BlackBoxSearch(beh, opt) controller = Controller(environment=env, behavior_search=bs, n_episodes=n_episodes, verbose=2) rewards = controller.learn(mp_keys, mp_values) best = bs.get_best_behavior() reward = controller.episode_with(best) return name, rewards, reward.sum()
penalty_via_point=cfg.penalty_via_point, log_to_stdout=True) if os.path.exists("initial_params.txt"): initial_params = np.loadtxt("initial_params.txt") else: initial_params = None opt = CMAESOptimizer(initial_params=initial_params, variance=cfg.variance["approxik"], random_state=0) bs = BlackBoxSearch(beh, opt) controller = Controller(environment=env, behavior_search=bs, n_episodes=1000, verbose=2) rewards = controller.learn(mp_keys, mp_values) best = bs.get_best_behavior() best_params = best.get_params() np.save("best_params_viapoint_joint.npy", best_params) reward = controller.episode_with(best) print(reward.sum()) plt.plot(rewards) ax = env.plot() ax.view_init(azim=-110, elev=30) ax.set_xticks((-0.3, 0.0, 0.3)) ax.set_yticks((0.0, -0.3, -0.6)) ax.set_zticks((0.3, 0.6, 0.9)) plt.savefig("viapoints.pdf") plt.show()
def test_controller_cmaes_sphere(): opt = CMAESOptimizer(initial_params=np.zeros(2)) ctrl = Controller(environment=ObjectiveFunction(), behavior_search=JustOptimizer(opt)) returns = ctrl.learn() assert_equal(len(returns), 10)
to solve the problem and policy search algorithm usually work very well in this domain. """ print(__doc__) import numpy as np import matplotlib.pyplot as plt from bolero.environment import OpenAiGym from bolero.behavior_search import BlackBoxSearch from bolero.optimizer import CMAESOptimizer from bolero.representation import LinearBehavior from bolero.controller import Controller beh = LinearBehavior() env = OpenAiGym("CartPole-v0", render=False, seed=0) opt = CMAESOptimizer(variance=10.0**2, random_state=0) bs = BlackBoxSearch(beh, opt) controller = Controller(environment=env, behavior_search=bs, n_episodes=300) rewards = controller.learn() controller.episode_with(bs.get_best_behavior()) plt.figure() ax = plt.subplot(111) ax.set_title("Optimization progress") ax.plot(rewards) ax.set_xlabel("Episode") ax.set_ylabel("Reward") ax.set_ylim(-10, 210) plt.show()
penalty_goal_dist=10000.0, penalty_start_dist=10000.0, penalty_obstacle=1000.0, penalty_length=10., hide_acc_from_interface=True, use_covar=True) opt = CMAESOptimizer(variance=0.1**2, random_state=0, initial_params=beh.get_params()) bs = BlackBoxSearch(beh, opt) controller = Controller(environment=env, behavior_search=bs, n_episodes=n_episodes, record_inputs=True) rewards = controller.learn(["x0", "g"], [x0, g]) controller.episode_with(bs.get_best_behavior(), ["x0", "g"], [x0, g]) X = np.asarray(controller.inputs_[-1]) X_hist = np.asarray(controller.inputs_) plt.figure(figsize=(8, 5)) ax = plt.subplot(121) ax.set_title("Optimization progress") ax.plot(rewards) ax.set_xlabel("Episode") ax.set_ylabel("Reward") ax = plt.subplot(122, aspect="equal") ax.set_title("Learned trajectory") plot_covariance(ax, X[:, :2], np.array(X[:, 4:]).reshape(-1, 4, 4))
import numpy as np from bolero.wrapper import CppBLLoader from bolero.controller import Controller from bolero.behavior_search import BlackBoxSearch from bolero.optimizer import CMAESOptimizer from dmp_behavior import DMPBehavior from first_feedback import FirstFeedback if __name__ == "__main__": environment_name = "throwing_environment" bll = CppBLLoader() bll.load_library(environment_name) env = bll.acquire_contextual_environment(environment_name) env = FirstFeedback(env, random_state=0) env.request_context(np.array([1.5, 1.0])) beh = DMPBehavior(dt=0.01, execution_time=0.5, n_features=5) opt = CMAESOptimizer(variance=200.0 ** 2, active=True, random_state=0) bs = BlackBoxSearch(beh, opt) ctrl = Controller(environment=env, behavior_search=bs, n_episodes=200, verbose=2) meta_params = [np.array([0.0, -0.8, -0.7]), np.array([0.5, 0.5, 0.5]), np.array([0.0, 0.5, 0.5])] print(ctrl.learn(["x0", "g", "gd"], meta_params))
A simple problem with a discrete state and action space is solved with a tabular reinforcement learning algorithm. The plot shows the obtained return for each episode. Successful episodes terminate with the return 1, otherwise the return is 0. The learning process is stopped when the value function converged. """ print(__doc__) import matplotlib.pyplot as plt from bolero.environment import OpenAiGym from bolero.behavior_search import MonteCarloRL from bolero.controller import Controller env = OpenAiGym("FrozenLake-v0", render=False, seed=1) env.init() bs = MonteCarloRL(env.get_discrete_action_space(), random_state=1) ctrl = Controller(environment=env, behavior_search=bs, n_episodes=10000, finish_after_convergence=True) rewards = ctrl.learn() plt.figure() ax = plt.subplot(111) ax.set_title("Learning progress") ax.plot(rewards) ax.set_xlabel("Episode") ax.set_ylabel("Reward") plt.show()