def test_gym_task_reset(mock_gym, fix_env): # Assign mock_gym.make.return_value = fix_env task = GymTask("example") # Act out = task.reset() # Assert assert fix_env.reset.called_once() assert len(out) > 0
def state_transform(state): """ Simple cropping of the top and bottom edge and converting to blackwhite scale. """ return (state[40:-10].sum(-1) > 0)[None, ...] def agent_state_tranform(state): return state env_name = 'Breakout-v0' task = GymTask(env_name, state_transform=state_transform) state_size = np.array(task.reset()).shape writer = SummaryWriter() config = { "update_freq": 10, "batch_size": 100, "warm_up": 100, "lr": 1e-4, "network_fn": lambda: QNetwork2D(state_size, task.action_size, hidden_layers=(200, 200)), "state_transform": agent_state_tranform,
"gamma": 0.99, "lr": 1e-3, "update_freq": 150, "batch_size": 400, "buffer_size": int(5e3), "device": "cuda", } for _ in range(prev_states): task.reset() agent = DQNAgent(task.state_size, task.action_size, **config) env_runner = EnvRunner(task, agent, data_logger=data_logger) # env_runner.interact_episode(0, render=True) scores = env_runner.run( reward_goal=1000, max_episodes=20000, log_every=1, eps_start=0.9, gif_every_episodes=200, force_new=True, ) # env_runner.interact_episode(render=True) data_logger.close()