def test_env_runner_log_data_interaction_debug_log(mock_data_logger, mock_task,
                                                   mock_agent):
    # Assign
    mock_task.step.return_value = ([1, 0.1], -1, False, {})
    mock_agent.act.return_value = 1
    env_runner = EnvRunner(mock_task,
                           mock_agent,
                           data_logger=mock_data_logger,
                           debug_log=True)

    # Act
    env_runner.interact_episode(eps=0.1,
                                max_iterations=10,
                                log_interaction_freq=None)
    env_runner.log_data_interaction()

    # Assert
    mock_agent.log_metrics.assert_called_once_with(mock_data_logger,
                                                   10,
                                                   full_log=False)
    assert mock_data_logger.log_values_dict.call_count == 20  # 10x iter per states and actions
    assert mock_data_logger.log_value.call_count == 20  # 10x iter per rewards and dones
Exemple #2
0
task: TaskType = GymTask(env_name)
config = {
    'rollout_length': 30,
    'batch_size': 30,
    "number_updates": 1,
    "ppo_ratio_clip": 0.2,
    "value_loss_weight": 2,
    "entropy_weight": 0.0005,
    "gamma": 0.98,
    "action_scale": 2,
    "max_grad_norm_actor": 2.0,
    "max_grad_norm_critic": 2.0,
    "critic_lr": 1e-3,
    "actor_lr": 1e-3,
}
agent = Agent(task.state_size, task.action_size, hidden_layers=(300, 300), config=config)
env_runner = EnvRunner(task, agent)

env_runner.interact_episode(0, render=True)
scores = env_runner.run(80, 4000)
env_runner.interact_episode(0, render=True)

# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(scores)), scores)
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.savefig(f'{env_name}.png', dpi=120)
plt.show()