def test_multi_sync_env_runner_save_state(mock_json, mock_path): # Assign test_agent.save_state = mock.MagicMock() env_runner = MultiSyncEnvRunner([test_task], test_agent) # Act env_runner.run(max_episodes=10, max_iterations=10) with mock.patch('builtins.open'): env_runner.save_state('saved_state.state') # Assert test_agent.save_state.assert_called_once() state = mock_json.dump.call_args[0][0] assert state['episode'] == 10 assert state['tot_iterations'] == 10 * 10
def test_multi_sync_env_runner_log_data_interaction_log_after_episode( mock_data_logger): # Assign test_agent.log_metrics = mock.MagicMock() env_runner = MultiSyncEnvRunner([test_task], test_agent, data_logger=mock_data_logger) # Act env_runner.run(max_episodes=1, max_iterations=10) # Assert test_agent.log_metrics.assert_called_once_with(mock_data_logger, 10, full_log=False) assert mock_data_logger.log_values_dict.call_count == 0 assert mock_data_logger.log_value.call_count == 4
def test_multi_sync_env_runner_run_single_step_single_task(): # Assign multi_sync_env_runner = MultiSyncEnvRunner([test_task], test_agent) # Act scores = multi_sync_env_runner.run(max_episodes=1, max_iterations=1, force_new=True) # Assert assert len( scores) == 1 # No chance that it'll terminate episode in 1 iteration
def test_multi_sync_env_runner_run_single_step_multiple_task(): # Assign tasks: List[TaskType] = [test_task, test_task] agent = PPOAgent(test_task.state_size, test_task.action_size, num_workers=len(tasks)) multi_sync_env_runner = MultiSyncEnvRunner(tasks, agent) # Act scores = multi_sync_env_runner.run(max_episodes=1, max_iterations=1, force_new=True) # Assert assert len( scores) == 2 # After 1 iteration both "finished" at the same time
def test_multi_sync_env_runner_run_multiple_step_multiple_task(): # Assign tasks: List[TaskType] = [test_task, test_task] agent = PPOAgent(test_task.state_size, test_task.action_size, num_workers=len(tasks)) multi_sync_env_runner = MultiSyncEnvRunner(tasks, agent) # Act scores = multi_sync_env_runner.run(max_episodes=3, max_iterations=100, force_new=True) # Assert assert len(scores) in ( 3, 4 ) # On rare occasions two tasks can complete twice at the same time.
"using_kl_div": True, # "value_loss_weight": 2, "entropy_weight": 0.01, "gamma": 0.999, 'lambda_gae': 0.98, "critic_lr": 3e-4, "actor_lr": 3e-4, "action_scale": 1, "action_min": -20, "action_max": 20, } tasks: List[TaskType] = [GymTask(env_name) for _ in range(num_workers)] agent = Agent(tasks[0].state_size, tasks[0].action_size, hidden_layers=(100, 64, 64), **kwargs) env_runner = MultiSyncEnvRunner(tasks, agent, processes=processes, data_logger=data_logger) scores = env_runner.run(reward_goal=80, max_episodes=5000, force_new=True) # plot the scores fig = plt.figure() ax = fig.add_subplot(111) plt.plot(range(len(scores)), scores) plt.ylabel('Score') plt.xlabel('Episode #') plt.savefig(f'{env_name}.png', dpi=120) plt.show()