def test_multi_sync_env_runner_save_state(mock_json, mock_path):
    # Assign
    test_agent.save_state = mock.MagicMock()
    env_runner = MultiSyncEnvRunner([test_task], test_agent)

    # Act
    env_runner.run(max_episodes=10, max_iterations=10)
    with mock.patch('builtins.open'):
        env_runner.save_state('saved_state.state')

    # Assert
    test_agent.save_state.assert_called_once()
    state = mock_json.dump.call_args[0][0]
    assert state['episode'] == 10
    assert state['tot_iterations'] == 10 * 10
def test_multi_sync_env_runner_log_data_interaction_log_after_episode(
        mock_data_logger):
    # Assign
    test_agent.log_metrics = mock.MagicMock()
    env_runner = MultiSyncEnvRunner([test_task],
                                    test_agent,
                                    data_logger=mock_data_logger)

    # Act
    env_runner.run(max_episodes=1, max_iterations=10)

    # Assert
    test_agent.log_metrics.assert_called_once_with(mock_data_logger,
                                                   10,
                                                   full_log=False)
    assert mock_data_logger.log_values_dict.call_count == 0
    assert mock_data_logger.log_value.call_count == 4
def test_multi_sync_env_runner_run_single_step_single_task():
    # Assign
    multi_sync_env_runner = MultiSyncEnvRunner([test_task], test_agent)

    # Act
    scores = multi_sync_env_runner.run(max_episodes=1,
                                       max_iterations=1,
                                       force_new=True)

    # Assert
    assert len(
        scores) == 1  # No chance that it'll terminate episode in 1 iteration
def test_multi_sync_env_runner_run_single_step_multiple_task():
    # Assign
    tasks: List[TaskType] = [test_task, test_task]
    agent = PPOAgent(test_task.state_size,
                     test_task.action_size,
                     num_workers=len(tasks))
    multi_sync_env_runner = MultiSyncEnvRunner(tasks, agent)

    # Act
    scores = multi_sync_env_runner.run(max_episodes=1,
                                       max_iterations=1,
                                       force_new=True)

    # Assert
    assert len(
        scores) == 2  # After 1 iteration both "finished" at the same time
def test_multi_sync_env_runner_run_multiple_step_multiple_task():
    # Assign
    tasks: List[TaskType] = [test_task, test_task]
    agent = PPOAgent(test_task.state_size,
                     test_task.action_size,
                     num_workers=len(tasks))
    multi_sync_env_runner = MultiSyncEnvRunner(tasks, agent)

    # Act
    scores = multi_sync_env_runner.run(max_episodes=3,
                                       max_iterations=100,
                                       force_new=True)

    # Assert
    assert len(scores) in (
        3, 4
    )  # On rare occasions two tasks can complete twice at the same time.
    "using_kl_div": True,
    # "value_loss_weight": 2,
    "entropy_weight": 0.01,
    "gamma": 0.999,
    'lambda_gae': 0.98,
    "critic_lr": 3e-4,
    "actor_lr": 3e-4,
    "action_scale": 1,
    "action_min": -20,
    "action_max": 20,
}
tasks: List[TaskType] = [GymTask(env_name) for _ in range(num_workers)]
agent = Agent(tasks[0].state_size,
              tasks[0].action_size,
              hidden_layers=(100, 64, 64),
              **kwargs)
env_runner = MultiSyncEnvRunner(tasks,
                                agent,
                                processes=processes,
                                data_logger=data_logger)
scores = env_runner.run(reward_goal=80, max_episodes=5000, force_new=True)

# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(range(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.savefig(f'{env_name}.png', dpi=120)
plt.show()