Python Controller.learnの例

プログラミング言語: Python

名前空間/パッケージ名: bolero.controller

クラス/型: Controller

メソッド/関数: learn

hotexamples.comのコード掲載数: 15

Python Controller.learn - 15件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのbolero.controller.Controller.learnの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

Controller(20)

learn(15)

episode_with(11)

よく使われるメソッド

Controller (20)

learn (15)

episode_with (11)

コード例 #1

ファイルを表示

def test_record_test_results():
    opt = CMAESOptimizer(initial_params=np.zeros(2))
    ctrl = Controller(environment=ObjectiveFunction(),
                      behavior_search=JustOptimizer(opt),
                      n_episodes_before_test=10, n_episodes=100)
    ctrl.learn()
    results = np.array(ctrl.test_results_)
    assert_equal(results.shape[0], 10)
    assert_true(np.all(results[:-1] <= results[1:]))

コード例 #2

ファイルを表示

def learn(setup_fun, variance):
    #ik, beh, mp_keys, mp_values = cfg.make_approx_cart_dmp(cfg.x0, cfg.g, cfg.execution_time, cfg.dt)
    #ik, beh, mp_keys, mp_values = cfg.make_exact_cart_dmp(cfg.x0, cfg.g, cfg.execution_time, cfg.dt)
    ik, beh, mp_keys, mp_values = cfg.make_joint_dmp(cfg.x0, cfg.g,
                                                     cfg.execution_time,
                                                     cfg.dt)

    env = Pendulum(x0=cfg.x0,
                   g=cfg.g,
                   execution_time=cfg.execution_time,
                   dt=cfg.dt)

    opt = CMAESOptimizer(variance=variance, random_state=0)
    bs = BlackBoxSearch(beh, opt)
    controller = Controller(environment=env,
                            behavior_search=bs,
                            n_episodes=n_episodes,
                            verbose=2)
    rewards = controller.learn(mp_keys, mp_values)

    best = bs.get_best_behavior()
    best_params = best.get_params()
    np.save("best_params_pendulum_joint.npy", best_params)
    reward = controller.episode_with(best)

    ax = env.plot()
    plt.show()

コード例 #3

ファイルを表示

def learn(name, setup_fun, run):
    ik, beh, mp_keys, mp_values = setup_fun(cfg.x0, cfg.g, cfg.execution_time,
                                            cfg.dt)
    env = ViaPointEnvironment(ik,
                              cfg.x0,
                              cfg.via_points,
                              cfg.execution_time,
                              cfg.dt,
                              cfg.qlo,
                              cfg.qhi,
                              penalty_vel=cfg.penalty_vel,
                              penalty_acc=cfg.penalty_acc,
                              penalty_via_point=cfg.penalty_via_point)

    opt = CMAESOptimizer(variance=cfg.variance[name], random_state=run)
    bs = BlackBoxSearch(beh, opt)
    controller = Controller(environment=env,
                            behavior_search=bs,
                            n_episodes=n_episodes,
                            verbose=0)
    rewards = controller.learn(mp_keys, mp_values)

    best = bs.get_best_behavior()
    reward = controller.episode_with(best)

    return name, rewards, reward.sum()

コード例 #4

ファイルを表示

def test_record_feedbacks():
    opt = CMAESOptimizer(initial_params=np.zeros(2))
    ctrl = Controller(environment=ObjectiveFunction(),
                      behavior_search=JustOptimizer(opt),
                      record_feedbacks=True, accumulate_feedbacks=False)
    returns = ctrl.learn()
    assert_array_equal(returns, ctrl.feedbacks_)

コード例 #5

ファイルを表示

ファイル: test_controller.py プロジェクト: xyyeh/bolero

def test_record_inputs():
    opt = CMAESOptimizer(initial_params=np.zeros(2))
    ctrl = Controller(environment=ObjectiveFunction(),
                      behavior_search=JustOptimizer(opt),
                      record_inputs=True)
    returns = ctrl.learn()
    assert_equal(len(returns), 10)
    assert_equal(np.array(ctrl.inputs_).shape, (10, 1, 2))

コード例 #6

ファイルを表示

ファイル: test_controller.py プロジェクト: xyyeh/bolero

def test_learn_controller_cmaes_sphere():
    opt = CMAESOptimizer(initial_params=np.zeros(2), random_state=0)
    ctrl = Controller(environment=ObjectiveFunction(random_state=0),
                      behavior_search=JustOptimizer(opt),
                      n_episodes=200)
    returns = ctrl.learn()
    dist_to_maximum = returns.max() - ctrl.environment.get_maximum_feedback()
    assert_greater(dist_to_maximum, -1e-5)

コード例 #7

ファイルを表示

def test_controller_cmaes_sphere_via_config():
    config = {
        "Environment": {"type": "bolero.environment.ObjectiveFunction"},
        "BehaviorSearch": {
            "type": "bolero.behavior_search.JustOptimizer",
            "optimizer": {"type": "bolero.optimizer.CMAESOptimizer",
                          "initial_params": np.zeros(2)}}
    }
    ctrl = Controller(config)
    returns = ctrl.learn()
    assert_equal(len(returns), 10)

コード例 #8

ファイルを表示

ファイル: test_monte_carlo_rl.py プロジェクト: ericyao2013/ROBOT-bolero

def test_mc_rl():
    env = OpenAiGym("FrozenLake-v0", render=False, seed=1)
    try:
        env.init()
    except ImportError:
        raise SkipTest("gym is not installed")
    bs = MonteCarloRL(env.get_discrete_action_space(), random_state=1)
    ctrl = Controller(environment=env, behavior_search=bs, n_episodes=10000,
                    finish_after_convergence=True)
    returns = ctrl.learn()
    assert_less(len(returns), 1000)
    beh = bs.get_best_behavior()
    rewards = ctrl.episode_with(beh)
    assert_equal(sum(rewards), 1.0)

コード例 #9

ファイルを表示

ファイル: benchmark_pendulum.py プロジェクト: rock-learning/approxik

def learn(name, run, setup_fun, variance):
    ik, beh, mp_keys, mp_values = setup_fun(
            cfg.x0, cfg.g, cfg.execution_time, cfg.dt)

    env = Pendulum(
        x0=cfg.x0, g=cfg.g,
        execution_time=cfg.execution_time, dt=cfg.dt
    )

    opt = CMAESOptimizer(variance=variance, random_state=run)
    bs = BlackBoxSearch(beh, opt)
    controller = Controller(environment=env, behavior_search=bs,
                            n_episodes=n_episodes, verbose=2)
    rewards = controller.learn(mp_keys, mp_values)

    best = bs.get_best_behavior()
    reward = controller.episode_with(best)

    return name, rewards, reward.sum()

コード例 #10

ファイルを表示

                          penalty_via_point=cfg.penalty_via_point,
                          log_to_stdout=True)

if os.path.exists("initial_params.txt"):
    initial_params = np.loadtxt("initial_params.txt")
else:
    initial_params = None
opt = CMAESOptimizer(initial_params=initial_params,
                     variance=cfg.variance["approxik"],
                     random_state=0)
bs = BlackBoxSearch(beh, opt)
controller = Controller(environment=env,
                        behavior_search=bs,
                        n_episodes=1000,
                        verbose=2)
rewards = controller.learn(mp_keys, mp_values)

best = bs.get_best_behavior()
best_params = best.get_params()
np.save("best_params_viapoint_joint.npy", best_params)
reward = controller.episode_with(best)
print(reward.sum())

plt.plot(rewards)
ax = env.plot()
ax.view_init(azim=-110, elev=30)
ax.set_xticks((-0.3, 0.0, 0.3))
ax.set_yticks((0.0, -0.3, -0.6))
ax.set_zticks((0.3, 0.6, 0.9))
plt.savefig("viapoints.pdf")
plt.show()

コード例 #11

ファイルを表示

ファイル: test_controller.py プロジェクト: xyyeh/bolero

def test_controller_cmaes_sphere():
    opt = CMAESOptimizer(initial_params=np.zeros(2))
    ctrl = Controller(environment=ObjectiveFunction(),
                      behavior_search=JustOptimizer(opt))
    returns = ctrl.learn()
    assert_equal(len(returns), 10)

コード例 #12

ファイルを表示

ファイル: plot_cart_pole.py プロジェクト: ericyao2013/ROBOT-bolero

to solve the problem and policy search algorithm usually work very well in
this domain.
"""
print(__doc__)

import numpy as np
import matplotlib.pyplot as plt
from bolero.environment import OpenAiGym
from bolero.behavior_search import BlackBoxSearch
from bolero.optimizer import CMAESOptimizer
from bolero.representation import LinearBehavior
from bolero.controller import Controller

beh = LinearBehavior()
env = OpenAiGym("CartPole-v0", render=False, seed=0)
opt = CMAESOptimizer(variance=10.0**2, random_state=0)
bs = BlackBoxSearch(beh, opt)
controller = Controller(environment=env, behavior_search=bs, n_episodes=300)

rewards = controller.learn()
controller.episode_with(bs.get_best_behavior())

plt.figure()
ax = plt.subplot(111)
ax.set_title("Optimization progress")
ax.plot(rewards)
ax.set_xlabel("Episode")
ax.set_ylabel("Reward")
ax.set_ylim(-10, 210)
plt.show()

コード例 #13

ファイルを表示

ファイル: plot_obstacle_avoidance_promp.py プロジェクト: xyyeh/bolero

                        penalty_goal_dist=10000.0,
                        penalty_start_dist=10000.0,
                        penalty_obstacle=1000.0,
                        penalty_length=10.,
                        hide_acc_from_interface=True,
                        use_covar=True)
opt = CMAESOptimizer(variance=0.1**2,
                     random_state=0,
                     initial_params=beh.get_params())
bs = BlackBoxSearch(beh, opt)
controller = Controller(environment=env,
                        behavior_search=bs,
                        n_episodes=n_episodes,
                        record_inputs=True)

rewards = controller.learn(["x0", "g"], [x0, g])
controller.episode_with(bs.get_best_behavior(), ["x0", "g"], [x0, g])
X = np.asarray(controller.inputs_[-1])
X_hist = np.asarray(controller.inputs_)

plt.figure(figsize=(8, 5))
ax = plt.subplot(121)
ax.set_title("Optimization progress")
ax.plot(rewards)
ax.set_xlabel("Episode")
ax.set_ylabel("Reward")

ax = plt.subplot(122, aspect="equal")
ax.set_title("Learned trajectory")
plot_covariance(ax, X[:, :2], np.array(X[:, 4:]).reshape(-1, 4, 4))

コード例 #14

ファイルを表示

import numpy as np
from bolero.wrapper import CppBLLoader
from bolero.controller import Controller
from bolero.behavior_search import BlackBoxSearch
from bolero.optimizer import CMAESOptimizer
from dmp_behavior import DMPBehavior
from first_feedback import FirstFeedback


if __name__ == "__main__":
    environment_name = "throwing_environment"
    bll = CppBLLoader()
    bll.load_library(environment_name)
    env = bll.acquire_contextual_environment(environment_name)
    env = FirstFeedback(env, random_state=0)
    env.request_context(np.array([1.5, 1.0]))

    beh = DMPBehavior(dt=0.01, execution_time=0.5, n_features=5)

    opt = CMAESOptimizer(variance=200.0 ** 2, active=True, random_state=0)

    bs = BlackBoxSearch(beh, opt)

    ctrl = Controller(environment=env, behavior_search=bs, n_episodes=200,
                      verbose=2)
    meta_params = [np.array([0.0, -0.8, -0.7]), np.array([0.5, 0.5, 0.5]),
                   np.array([0.0, 0.5, 0.5])]
    print(ctrl.learn(["x0", "g", "gd"], meta_params))

コード例 #15

ファイルを表示

A simple problem with a discrete state and action space is solved with
a tabular reinforcement learning algorithm. The plot shows the obtained return
for each episode. Successful episodes terminate with the return 1, otherwise
the return is 0. The learning process is stopped when the value function
converged.
"""
print(__doc__)

import matplotlib.pyplot as plt
from bolero.environment import OpenAiGym
from bolero.behavior_search import MonteCarloRL
from bolero.controller import Controller

env = OpenAiGym("FrozenLake-v0", render=False, seed=1)
env.init()
bs = MonteCarloRL(env.get_discrete_action_space(), random_state=1)
ctrl = Controller(environment=env,
                  behavior_search=bs,
                  n_episodes=10000,
                  finish_after_convergence=True)
rewards = ctrl.learn()

plt.figure()
ax = plt.subplot(111)
ax.set_title("Learning progress")
ax.plot(rewards)
ax.set_xlabel("Episode")
ax.set_ylabel("Reward")
plt.show()