# PLE takes our game and the state_preprocessor. It will process the state # for our agent. game = Catcher(width=128, height=128) env = PLE(game, fps=60, state_preprocessor=nv_state_preprocessor) agent = Agent(env, batch_size, num_frames, frame_skip, lr, discount, rng, optimizer="sgd_nesterov") agent.build_model() memory = ReplayMemory(max_memory_size, min_memory_size) env.init() for epoch in range(1, num_epochs + 1): steps, num_episodes = 0, 0 losses, rewards = [], [] env.display_screen = False # training loop while steps < num_steps_train: episode_reward = 0.0 agent.start_episode() while env.game_over() == False and steps < num_steps_train: state = env.getGameState()
#memory settings max_memory_size = 100000 min_memory_size = 1000 #number needed before model training starts epsilon_rate = (epsilon - epsilon_min) / epsilon_steps #PLE takes our game and the state_preprocessor. It will process the state for our agent. game = Catcher(width=128, height=128) env = PLE(game, fps=60, state_preprocessor=nv_state_preprocessor) agent = Agent(env, batch_size, num_frames, frame_skip, lr, discount, rng, optimizer="sgd_nesterov") agent.build_model() memory = ReplayMemory(max_memory_size, min_memory_size) env.init() for epoch in range(1, num_epochs+1): steps, num_episodes = 0, 0 losses, rewards = [], [] env.display_screen = False #training loop while steps < num_steps_train: episode_reward = 0.0 agent.start_episode() while env.game_over() == False and steps < num_steps_train: state = env.getGameState()