Python RLControlledRandomWalk._interpret_rl_actionの例

プログラミング言語: Python

名前空間/パッケージ名: snc.environments.rl_environment_wrapper

メソッド/関数: _interpret_rl_action

hotexamples.comのコード掲載数: 2

Python RLControlledRandomWalk._interpret_rl_action - 2件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのsnc.environments.rl_environment_wrapper.RLControlledRandomWalk._interpret_rl_actionの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

RLControlledRandomWalk(17)

step(4)

_find_coupled_resource_sets(3)

_interpret_rl_action(2)

preprocess_action(2)

コード例 #1

ファイルを表示

def test_complex_rl_action_interpretation():
    """
    Tests the RL environment's action interpretation for the ksrs_network_model example as per the
    examples file from which the initial set up code is copied.
    See snc/stochastic_network_control/environments/examples.py for the original code.
    This tests action interpretation where each resource controls multiple activities.
    """
    # Set up model parameters
    alpha1, alpha3 = 2, 2
    mu1, mu2, mu3, mu4 = 10, 3, 10, 3
    cost_per_buffer = np.ones((4, 1))
    initial_state = (0, 0, 0, 0)
    capacity = np.ones((4, 1)) * np.inf
    job_conservation_flag = True
    seed = 72
    demand_rate = np.array([alpha1, 0, alpha3, 0])[:, None]
    buffer_processing_matrix = np.array([[-mu1, 0, 0, 0],
                                         [mu1, -mu2, 0, 0],
                                         [0, 0, -mu3, 0],
                                         [0, 0, mu3, -mu4]])
    constituency_matrix = np.array([[1, 0, 0, 1],
                                    [0, 1, 1, 0]])

    # Construct environment.
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix, job_gen_seed=seed)
    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag)

    # ----------------- Enumeration of Actions -----------------
    # Zero: First group idles
    # First: First activity on (first action of first group)
    # Second: Fourth activity on (second action of first group)
    # Third: Second group idles
    # Fourth: Second activity (first action of second group)
    # Fifth: Third activity (second action of second group)

    # Build various test RL actions in increasing complexity
    idle_action = env._interpret_rl_action(np.zeros(6))
    binary_action_a = np.zeros(6)
    binary_action_a[2] = 1
    action_a = env._interpret_rl_action(binary_action_a)
    binary_action_b = np.zeros(6)
    binary_action_b[[2, 4]] = 1
    action_b = env._interpret_rl_action(binary_action_b)

    # Check that the null action, a simple action and an action combining activities across resource
    # sets is set up correctly.
    assert np.all(idle_action == np.zeros(4))
    assert np.all(action_a == np.array([0, 0, 0, 1]))
    assert np.all(action_b == np.array([0, 1, 0, 1]))

コード例 #2

ファイルを表示

def test_simple_rl_action_interpretation():
    """
    Tests the mechanism for reading in actions from an RL model.
    This utilises the 4 resource 4 activity tandem model.
    """
    # Tandem model set up without any additional activity constraints
    alpha1 = 4
    cost_per_buffer = np.ones((4, 1))
    initial_state = np.zeros((4, 1))
    capacity = np.ones((4, 1)) * np.inf
    job_conservation_flag = False
    seed = 72

    demand_rate = np.array([alpha1, 0, 0, 0])[:, None]
    buffer_processing_matrix = np.array([
        [-1, 0, 0, 0],
        [1, -1, 0, 0],
        [0, 1, -1, 0],
        [0, 0, 1, -1]
    ])

    constituency_matrix = np.eye(4)

    # Construct environment.
    job_generator = ScaledBernoulliServicesPoissonArrivalsGenerator(
        demand_rate, buffer_processing_matrix, job_gen_seed=seed)
    state_initialiser = stinit.DeterministicCRWStateInitialiser(initial_state)
    env = RLControlledRandomWalk(cost_per_buffer, capacity, constituency_matrix, job_generator,
                                 state_initialiser, job_conservation_flag,
                                 index_phys_resources=(0, 1, 2, 3))

    # Build various test RL actions in increasing complexity
    null_action = env._interpret_rl_action(np.zeros(8))
    binary_action_one = np.zeros(8)
    binary_action_one[1] = 1
    action_one = env._interpret_rl_action(binary_action_one)
    binary_action_two = np.zeros(8)
    binary_action_two[[1, 3, 5, 7]] = 1
    action_two = env._interpret_rl_action(binary_action_two)

    # Test that the actions are interpreted correctly
    assert np.all(null_action == np.zeros(4))
    assert np.all(action_one == np.array([1, 0, 0, 0]))
    assert np.all(action_two == np.ones(4))