def test_multi_sync_env_runner_log_episode_metrics(mock_data_logger, mock_task,
                                                   mock_agent):
    # Assign
    episodes = [1, 2]
    epsilons = [0.2, 0.1]
    mean_scores = [0.5, 1]
    scores = [1.5, 5]
    iterations = [10, 10]
    episode_data = dict(episodes=episodes,
                        epsilons=epsilons,
                        mean_scores=mean_scores,
                        iterations=iterations,
                        scores=scores)
    env_runner = MultiSyncEnvRunner(mock_task,
                                    mock_agent,
                                    data_logger=mock_data_logger)

    # Act
    env_runner.log_episode_metrics(**episode_data)

    # Assert
    for idx, episode in enumerate(episodes):
        mock_data_logger.log_value.assert_any_call("episode/epsilon",
                                                   epsilons[idx], episode)
        mock_data_logger.log_value.assert_any_call("episode/avg_score",
                                                   mean_scores[idx], episode)
        mock_data_logger.log_value.assert_any_call("episode/score",
                                                   scores[idx], episode)
        mock_data_logger.log_value.assert_any_call("episode/iterations",
                                                   iterations[idx], episode)
def test_multi_sync_env_runner_log_data_interaction_no_data_logger(
        mock_task, mock_agent):
    # Assign
    env_runner = MultiSyncEnvRunner(mock_task, mock_agent)

    # Act
    env_runner.log_data_interaction()

    # Assert
    mock_agent.log_metrics.assert_not_called()
def test_multi_sync_env_runner_load_state_no_file(mock_task, mock_agent):
    # Assign
    env_runner = MultiSyncEnvRunner(mock_task, mock_agent, max_iterations=10)
    env_runner.logger = mock.MagicMock()

    # Act
    env_runner.load_state(file_prefix='saved_state')

    # Assert
    env_runner.logger.warning.assert_called_once_with(
        "Couldn't load state. Forcing restart.")
    mock_agent.load_state.assert_not_called()
def test_multi_sync_env_runner_run_single_step_single_task():
    # Assign
    multi_sync_env_runner = MultiSyncEnvRunner([test_task], test_agent)

    # Act
    scores = multi_sync_env_runner.run(max_episodes=1,
                                       max_iterations=1,
                                       force_new=True)

    # Assert
    assert len(
        scores) == 1  # No chance that it'll terminate episode in 1 iteration
def test_multi_sync_env_runner_log_episode_metrics_values_missing(
        mock_data_logger, mock_task, mock_agent):
    # Assign
    episodes = [1, 2]
    episode_data = dict(episodes=episodes)
    env_runner = MultiSyncEnvRunner(mock_task,
                                    mock_agent,
                                    data_logger=mock_data_logger)

    # Act
    env_runner.log_episode_metrics(**episode_data)

    # Assert
    mock_data_logger.log_value.assert_not_called()
def test_multi_sync_env_runner_info_no_data_logger(mock_task, mock_agent):
    # Assign
    mock_tasks: List[TaskType] = [mock_task, mock_task]
    env_runner = MultiSyncEnvRunner(mock_tasks, mock_agent)
    env_runner.logger = mock.MagicMock()
    info_data = dict(episodes=[2],
                     iterations=[10],
                     scores=[1],
                     mean_scores=[2],
                     epsilons=[1])

    # Act
    env_runner.info(**info_data)

    # Assert
    env_runner.logger.info.assert_called_once()
def test_multi_sync_env_runner_run_single_step_multiple_task():
    # Assign
    tasks: List[TaskType] = [test_task, test_task]
    agent = PPOAgent(test_task.state_size,
                     test_task.action_size,
                     num_workers=len(tasks))
    multi_sync_env_runner = MultiSyncEnvRunner(tasks, agent)

    # Act
    scores = multi_sync_env_runner.run(max_episodes=1,
                                       max_iterations=1,
                                       force_new=True)

    # Assert
    assert len(
        scores) == 2  # After 1 iteration both "finished" at the same time
def test_multi_sync_env_runner_log_data_interaction_log_after_episode(
        mock_data_logger):
    # Assign
    test_agent.log_metrics = mock.MagicMock()
    env_runner = MultiSyncEnvRunner([test_task],
                                    test_agent,
                                    data_logger=mock_data_logger)

    # Act
    env_runner.run(max_episodes=1, max_iterations=10)

    # Assert
    test_agent.log_metrics.assert_called_once_with(mock_data_logger,
                                                   10,
                                                   full_log=False)
    assert mock_data_logger.log_values_dict.call_count == 0
    assert mock_data_logger.log_value.call_count == 4
def test_multi_sync_env_runner_run_multiple_step_multiple_task():
    # Assign
    tasks: List[TaskType] = [test_task, test_task]
    agent = PPOAgent(test_task.state_size,
                     test_task.action_size,
                     num_workers=len(tasks))
    multi_sync_env_runner = MultiSyncEnvRunner(tasks, agent)

    # Act
    scores = multi_sync_env_runner.run(max_episodes=3,
                                       max_iterations=100,
                                       force_new=True)

    # Assert
    assert len(scores) in (
        3, 4
    )  # On rare occasions two tasks can complete twice at the same time.
Esempio n. 10
0
def test_multi_sync_env_runner_init_str_check(mock_task, mock_agent):
    # Assign & Act
    mock_agent.name = "Agent"
    mock_task.name = "Task"
    multi_sync_env_runner = MultiSyncEnvRunner([mock_task], mock_agent)

    # Assert
    assert str(multi_sync_env_runner) == "MultiSyncEnvRunner<['Task'], Agent>"
Esempio n. 11
0
def test_multi_sync_env_runner_load_state(mock_task, mock_agent, mock_os):
    # Assign
    env_runner = MultiSyncEnvRunner(mock_task, mock_agent, max_iterations=10)
    mock_os.listdir.return_value = [
        'saved_state_e10.json', 'saved_state_e999.json', 'other.file'
    ]
    mocked_state = '{"episode": 10, "epsilon": 0.2, "score": 0.3, "average_score": -0.1}'

    # Act
    with mock.patch('builtins.open',
                    mock.mock_open(read_data=mocked_state)) as mock_file:
        env_runner.load_state(file_prefix='saved_state')
        mock_file.assert_called_once_with(
            f'{env_runner.state_dir}/saved_state_e999.json', 'r')

    # Assert
    mock_agent.load_state.assert_called_once()
    assert env_runner.episode == 10
    assert env_runner.epsilon == 0.2
    assert len(env_runner.all_scores) == 1
    assert env_runner.all_scores[0] == 0.3
Esempio n. 12
0
def test_multi_sync_env_runner_reset(mock_task, mock_agent):
    # Assign
    multi_sync_env_runner = MultiSyncEnvRunner([mock_task],
                                               mock_agent,
                                               window_len=10)
    multi_sync_env_runner.episode = 10
    multi_sync_env_runner.all_iterations.extend(
        map(lambda _: random.randint(1, 100), range(10)))
    multi_sync_env_runner.all_scores.extend(
        map(lambda _: random.random(), range(10)))
    multi_sync_env_runner.scores_window.extend(
        map(lambda _: random.random(), range(10)))

    # Act
    multi_sync_env_runner.reset()

    # Assert
    assert multi_sync_env_runner.episode == 0
    assert len(multi_sync_env_runner.all_iterations) == 0
    assert len(multi_sync_env_runner.all_scores) == 0
    assert len(multi_sync_env_runner.scores_window) == 0
Esempio n. 13
0
def test_multi_sync_env_runner_info_with_data_logger(mock_task, mock_agent):
    # Assign
    data_logger = mock.MagicMock()
    env_runner = MultiSyncEnvRunner(mock_task,
                                    mock_agent,
                                    data_logger=data_logger)
    env_runner.logger = mock.MagicMock()
    info_data = dict(episodes=[2],
                     iterations=[10],
                     scores=[1],
                     mean_scores=[2],
                     epsilons=[1])

    # Act
    env_runner.info(**info_data)

    # Assert
    env_runner.logger.info.assert_called_once()
    assert data_logger.log_value.call_count == 4
    mock_agent.log_metrics.assert_called_once_with(data_logger,
                                                   mock.ANY,
                                                   full_log=False)
Esempio n. 14
0
def test_multi_sync_env_runner_save_state(mock_json, mock_path):
    # Assign
    test_agent.save_state = mock.MagicMock()
    env_runner = MultiSyncEnvRunner([test_task], test_agent)

    # Act
    env_runner.run(max_episodes=10, max_iterations=10)
    with mock.patch('builtins.open'):
        env_runner.save_state('saved_state.state')

    # Assert
    test_agent.save_state.assert_called_once()
    state = mock_json.dump.call_args[0][0]
    assert state['episode'] == 10
    assert state['tot_iterations'] == 10 * 10
    "using_kl_div": True,
    # "value_loss_weight": 2,
    "entropy_weight": 0.01,
    "gamma": 0.999,
    'lambda_gae': 0.98,
    "critic_lr": 3e-4,
    "actor_lr": 3e-4,
    "action_scale": 1,
    "action_min": -20,
    "action_max": 20,
}
tasks: List[TaskType] = [GymTask(env_name) for _ in range(num_workers)]
agent = Agent(tasks[0].state_size,
              tasks[0].action_size,
              hidden_layers=(100, 64, 64),
              **kwargs)
env_runner = MultiSyncEnvRunner(tasks,
                                agent,
                                processes=processes,
                                data_logger=data_logger)
scores = env_runner.run(reward_goal=80, max_episodes=5000, force_new=True)

# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(range(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.savefig(f'{env_name}.png', dpi=120)
plt.show()