def test_gray_scale_observation(env_id, keep_dim): gray_env = AtariPreprocessing(gym.make(env_id), screen_size=84, grayscale_obs=True) rgb_env = AtariPreprocessing(gym.make(env_id), screen_size=84, grayscale_obs=False) wrapped_env = GrayScaleObservation(rgb_env, keep_dim=keep_dim) assert rgb_env.observation_space.shape[-1] == 3 seed = 0 gray_env.seed(seed) wrapped_env.seed(seed) gray_obs = gray_env.reset() wrapped_obs = wrapped_env.reset() if keep_dim: assert wrapped_env.observation_space.shape[-1] == 1 assert len(wrapped_obs.shape) == 3 wrapped_obs = wrapped_obs.squeeze(-1) else: assert len(wrapped_env.observation_space.shape) == 2 assert len(wrapped_obs.shape) == 2 # ALE gray scale is slightly different, but no more than by one shade assert np.allclose(gray_obs.astype("int32"), wrapped_obs.astype("int32"), atol=1)
def __init__(self, discount: float): super().__init__(discount) self.env = gym.make('BreakoutDeterministic-v4') self.env = ResizeObservation(self.env, shape=(84, 84)) self.env = GrayScaleObservation(self.env, keep_dim=True) self.actions = list( map(lambda i: Action(i), range(self.env.action_space.n))) self.observations = [self.env.reset()] self.done = False
def __init__(self, episodes): self.current_episode = 0 self.episodes = episodes self.episode_score = [] self.episode_qs = [] self.episode_distance = [] self.episode_loss = [] self.fig, self.ax = plt.subplots(2, 2) self.fig.canvas.draw() plt.show(block=False) self.env = gym_super_mario_bros.make('SuperMarioBros-v0') # Apply Observation Wrappers self.env = GrayScaleObservation(self.env) self.env = ResizeObservation(self.env, 84) # Apply Control Wrappers self.env = JoypadSpace(self.env, SIMPLE_MOVEMENT) self.env = NoopResetEnv(self.env) # Apply Frame Wrappers self.env = SkipFrame(self.env, 4) self.env = FrameStack(self.env, 4) self.agent = DQNAgent(stateShape=(84, 84, 4), actionSpace=self.env.action_space, numPicks=32, memorySize=100000)
def __init__(self, episodes, checkpoint, current_episode, epsilon): self.current_episode = current_episode self.episodes = episodes self.episode_score = [] self.episode_qs = [] self.episode_distance = [] self.episode_loss = [] self.episode_policies = [] self.fig, self.ax = plt.subplots(1, 2, figsize=(12, 4)) self.fig.canvas.draw() self.env = gym_super_mario_bros.make('SuperMarioBros-1-1-v0') # Apply Observation Wrappers self.env = GrayScaleObservation(self.env) self.env = ResizeObservation(self.env, 84) # Apply Control Wrappers self.env = JoypadSpace(self.env, SIMPLE_MOVEMENT) self.env = NoopResetEnv(self.env) # Apply Frame Wrappers self.env = SkipFrame(self.env, 4) self.env = FrameStack(self.env, 4) self.agent = DQNAgent(stateShape=(4, 84, 84), actionSpace=self.env.action_space, numPicks=32, memorySize=20000, numRewards=4, epsilon=epsilon, checkpoint=checkpoint)
def __init__(self, env_name): env = gym.make(env_name) self.env = GrayScaleObservation(env) self.env = NormalizeObservation(self.env) self.env = FrameStack(self.env, 4) gym.Wrapper.__init__(self, self.env)
def _inner() -> gym.Env: env = gym.make(ENV_NAME, verbose=0) env.seed(seed) if not is_eval: env = Monitor(env, run_dir) env = GrayScaleObservation(env, keep_dim=True) if frame_skip > 0: env = MaxAndSkipEnv(env, skip=frame_skip) return env
def wrapper(env): # skip to every 4th frame. Remove redundant info. to speed up training env = SkipEnv(env, skip=4) # rgb to gray. Reduce input dimension thus model size env = GrayScaleObservation(env, keep_dim=False) # resize to 84 x 84. Reduce input dimension thus model size env = ResizeObservation(env, shape=84) # make obs a stack of previous 3 frames. Need consecutive frames # to differentiate landing vs. taking off env = FrameStack(env, num_stack=4) return env
def setup_wrappers(env): obs_shape = env.observation_space.shape is_image = len(obs_shape) == 3 if is_image: from gym.wrappers import GrayScaleObservation from gym.wrappers import FlattenObservation from gym.wrappers import ResizeObservation env = GrayScaleObservation(env) # env = ResizeObservation(env, (obs_shape[0]//3, obs_shape[0]//3)) env = FlattenObservation(env) return env
class Breakout(AbstractGame): """The Gym CartPole environment""" def __init__(self, discount: float): super().__init__(discount) self.env = gym.make('BreakoutDeterministic-v4') self.env = ResizeObservation(self.env, shape=(84, 84)) self.env = GrayScaleObservation(self.env, keep_dim=True) self.actions = list( map(lambda i: Action(i), range(self.env.action_space.n))) self.observations = [self.env.reset()] self.done = False @property def action_space_size(self) -> int: """Return the size of the action space.""" return len(self.actions) def step(self, action) -> int: """Execute one step of the game conditioned by the given action.""" observation, reward, done, _ = self.env.step(action.index) self.observations += [observation] self.done = done return reward def terminal(self) -> bool: """Is the game is finished?""" return self.done def legal_actions(self) -> List[Action]: """Return the legal actions available at this instant.""" return self.actions def make_image(self, state_index: int): """Compute the state of the game.""" return self.observations[state_index]
def create_env(env_name="SuperMarioBros-1-1-v0"): env = gym_super_mario_bros.make(env_name) # Restricts action space to only "right" and "jump + right" env = JoypadSpace(env, [["right"], ["right", "A"]]) # Accumulates rewards every 4th frame env = SkipFrame(env, skip=4) # Transform RGB image to graycale, [240, 256] env = GrayScaleObservation(env) # Downsample to new size, [1, 84, 84] env = ResizeObservation(env, shape=84) # Add extra precision to np.array state env = TransformObservation(env, f=lambda x: x / 255.) # Squash 4 consecutive frames of the environment into a # single observation point to feed to our learning model, [4, 84, 84] env = FrameStack(env, num_stack=4) return env
from metrics import MetricLogger from agent import Mario from wrappers import ResizeObservation, SkipFrame # Initialize Super Mario environment env = gym_super_mario_bros.make('SuperMarioBros-1-1-v0') # Limit the action-space to # 0. walk right # 1. jump right env = JoypadSpace(env, [['right'], ['right', 'A']]) # Apply Wrappers to environment env = SkipFrame(env, skip=4) env = GrayScaleObservation(env, keep_dim=False) env = ResizeObservation(env, shape=84) env = TransformObservation(env, f=lambda x: x / 255.) env = FrameStack(env, num_stack=4) env.reset() save_dir = Path('checkpoints') / datetime.datetime.now().strftime( '%Y-%m-%dT%H-%M-%S') save_dir.mkdir(parents=True) checkpoint = None # Path('checkpoints/2020-10-21T18-25-27/mario.chkpt') # Add in check to see if GPU is avaliable (BM) if torch.cuda.is_available(): device = torch.device("cuda:0")
import gym import gym_super_mario_bros from gym.wrappers import FrameStack, GrayScaleObservation, TransformObservation from nes_py.wrappers import JoypadSpace from metrics import MetricLogger from agent import Mario from wrappers import ResizeObservation, SkipFrame env = gym_super_mario_bros.make('SuperMarioBros-1-1-v0') env = JoypadSpace(env, [['right'], ['right', 'A']]) env = SkipFrame(env, skip=4) env = GrayScaleObservation(env) env = ResizeObservation(env, shape=84) env = FrameStack(env, num_stack=4) env.reset() save_dir = Path('checkpoints_evaluate') / datetime.datetime.now().strftime( '%Y-%m-%dT%H-%M-%S') save_dir.mkdir(parents=True) checkpoint = Path('mario_net.chkpt') mario = Mario(state_dim=(4, 84, 84), action_dim=env.action_space.n, save_dir=save_dir, checkpoint=checkpoint) mario.exploration_rate = mario.exploration_rate_min
epsilon_start = 0.0 parser = argparse.ArgumentParser(description=None) # Question 1 # Change the var called environment_used to change the environment parser.add_argument('env_id', nargs='?', default=environment_used, help='Select the environment to run') args = parser.parse_args() # You can set the level to logger.DEBUG or logger.WARN if you # want to change the amount of output. logger.set_level(logger.INFO) env = gym.make(args.env_id) if environment_used != "CartPole-v1": env = GrayScaleObservation(env) env = ResizeObservation(env, resolution) env = FrameStack(env, 4) agent = VizdoomAgent(env.action_space, resolution , eta, test_mode, environment_used) else: agent = RandomAgent(env.action_space, env.observation_space, eta, test_mode, environment_used) # You provide the directory to write to (can be an existing # directory, including one with existing data -- all monitor files # will be namespaced). You can also dump to a tempdir if you'd # like: tempfile.mkdtemp(). outdir = '/tmp/random-agent-results' env = wrappers.Monitor(env, directory=outdir, force=True) env.seed(0) mem = Memory()