Python LQR 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: mushroom_rl.environments.lqr

클래스/타입: LQR

hotexamples.com에서의 예제들: 2

Python LQR - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 mushroom_rl.environments.lqr.LQR에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

generate(2)

LQR(1)

reset(1)

step(1)

자주 사용되는 메소드들

generate (2)

LQR (1)

reset (1)

step (1)

예제 #1

파일 보기

def learn(alg, alg_params):
    mdp = LQR.generate(dimensions=1)
    np.random.seed(1)
    torch.manual_seed(1)
    torch.cuda.manual_seed(1)

    approximator_params = dict(input_dim=mdp.info.observation_space.shape)
    approximator = Regressor(LinearApproximator,
                             input_shape=mdp.info.observation_space.shape,
                             output_shape=mdp.info.action_space.shape,
                             params=approximator_params)

    sigma = Regressor(LinearApproximator,
                      input_shape=mdp.info.observation_space.shape,
                      output_shape=mdp.info.action_space.shape,
                      params=approximator_params)

    sigma_weights = 2 * np.ones(sigma.weights_size)
    sigma.set_weights(sigma_weights)

    policy = StateStdGaussianPolicy(approximator, sigma)

    agent = alg(mdp.info, policy, **alg_params)

    core = Core(agent, mdp)

    core.learn(n_episodes=10, n_episodes_per_fit=5)

    return policy

예제 #2

파일 보기

def test_lqr():
    np.random.seed(1)
    mdp = LQR.generate(2)
    mdp.reset()
    for i in range(10):
        ns, r, ab, _ = mdp.step(np.random.rand(mdp.info.action_space.shape[0]))
    ns_test = np.array([12.35564605, 14.98996889])

    assert np.allclose(ns, ns_test)

    A = np.eye(3)
    B = np.array([[2 / 3, 0], [1 / 3, 1 / 3], [0, 2 / 3]])
    Q = np.array([[0.1, 0., 0.], [0., 0.9, 0.], [0., 0., 0.1]])
    R = np.array([[0.1, 0.], [0., 0.9]])
    mdp = LQR(A, B, Q, R, max_pos=11.0, max_action=0.5, episodic=True)
    mdp.reset()

    a_test = np.array([1.0, 0.3])
    ns, r, ab, _ = mdp.step(a_test)
    ns_test = np.array([10.23333333, 10.16666667, 10.1])
    assert np.allclose(ns, ns_test) and np.allclose(r, -107.917) and not ab

    a_test = np.array([0.4, -0.1])
    ns, r, ab, _ = mdp.step(a_test)
    ns_test = np.array([10.5, 10.26666667, 10.03333333])
    assert np.allclose(ns, ns_test) and np.allclose(
        r, -113.72311111111117) and not ab

    a_test = np.array([0.5, 0.6])
    ns, r, ab, _ = mdp.step(a_test)
    ns_test = np.array([10.83333333, 10.6, 10.36666667])
    assert np.allclose(ns, ns_test) and np.allclose(
        r, -116.20577777777778) and not ab

    a_test = np.array([0.3, -0.7])
    ns, r, ab, _ = mdp.step(a_test)
    ns_test = np.array([11.03333333, 10.53333333, 10.03333333])
    assert np.allclose(ns, ns_test) and np.allclose(r, -1210.0) and ab