class BaseEnv(gym.Env): metadata = {'render.modes': ['human', 'rgb_array']} def __init__(self, display_time=50, **kwargs): self.game_name = 'Game Name' self.display_time = display_time self.init(**kwargs) def init(self, **kwargs): self.game = Environment(env_name=self.game_name, **kwargs) self.action_set = self.game.env.action_map self.action_space = spaces.Discrete(self.game.num_actions()) self.observation_space = spaces.Box(0.0, 1.0, shape=self.game.state_shape(), dtype=np.float32) def step(self, action): reward, done = self.game.act(action) return (self.game.state(), reward, done, {}) def reset(self): self.game.reset() return self.game.state() def seed(self, seed=None): self.game = Environment(env_name=self.game_name, random_seed=seed) return seed def render(self, mode='human'): if mode == 'rgb_array': return self.game.state() elif mode == 'human': self.game.display_state(self.display_time) def close(self): if self.game.visualized: self.game.close_display() return 0
G = 0.0 # Initialize the environment env.reset() terminated = False # Obtain first state, unused by random agent, but included for illustration s = env.state() while (not terminated): # Select an action uniformly at random action = random.randrange(num_actions) # Act according to the action and observe the transition and reward reward, terminated = env.act(action) # Obtain s_prime, unused by random agent, but included for illustration s_prime = env.state() env.display_state(50) G += reward # Increment the episodes e += 1 # Store the return for each episode returns.append(G) print("Avg Return: " + str(numpy.mean(returns)) + "+/-" + str(numpy.std(returns) / numpy.sqrt(NUM_EPISODES)))