game.new_episode() game_state = game.get_state() misc = game_state.game_variables # [KILLCOUNT, AMMO, HEALTH] prev_misc = misc action_size = game.get_available_buttons_size() img_rows, img_cols = 64, 64 # Convert image into Black and white img_channels = 4 # We stack 4 frames state_size = (img_rows, img_cols, img_channels) agent = A2CAgent(state_size, action_size) agent.actor = Networks.actor_network(state_size, action_size, agent.actor_lr) agent.critic = Networks.critic_network(state_size, agent.value_size, agent.critic_lr) # Start training GAME = 0 t = 0 max_life = 0 # Maximum episode life (Proxy for agent performance) # Buffer to compute rolling statistics life_buffer, ammo_buffer, kills_buffer = [], [], [] for i in range(max_episodes): game.new_episode() game_state = game.get_state() misc = game_state.game_variables prev_misc = misc