Example #1
0
def test_gray_scale_observation(env_id, keep_dim):
    gray_env = AtariPreprocessing(gym.make(env_id),
                                  screen_size=84,
                                  grayscale_obs=True)
    rgb_env = AtariPreprocessing(gym.make(env_id),
                                 screen_size=84,
                                 grayscale_obs=False)
    wrapped_env = GrayScaleObservation(rgb_env, keep_dim=keep_dim)
    assert rgb_env.observation_space.shape[-1] == 3

    seed = 0
    gray_env.seed(seed)
    wrapped_env.seed(seed)

    gray_obs = gray_env.reset()
    wrapped_obs = wrapped_env.reset()

    if keep_dim:
        assert wrapped_env.observation_space.shape[-1] == 1
        assert len(wrapped_obs.shape) == 3
        wrapped_obs = wrapped_obs.squeeze(-1)
    else:
        assert len(wrapped_env.observation_space.shape) == 2
        assert len(wrapped_obs.shape) == 2

    # ALE gray scale is slightly different, but no more than by one shade
    assert np.allclose(gray_obs.astype("int32"),
                       wrapped_obs.astype("int32"),
                       atol=1)
Example #2
0
 def __init__(self, discount: float):
     super().__init__(discount)
     self.env = gym.make('BreakoutDeterministic-v4')
     self.env = ResizeObservation(self.env, shape=(84, 84))
     self.env = GrayScaleObservation(self.env, keep_dim=True)
     self.actions = list(
         map(lambda i: Action(i), range(self.env.action_space.n)))
     self.observations = [self.env.reset()]
     self.done = False
Example #3
0
    def __init__(self, episodes):
        self.current_episode = 0
        self.episodes = episodes

        self.episode_score = []
        self.episode_qs = []
        self.episode_distance = []
        self.episode_loss = []

        self.fig, self.ax = plt.subplots(2, 2)
        self.fig.canvas.draw()
        plt.show(block=False)

        self.env = gym_super_mario_bros.make('SuperMarioBros-v0')
        # Apply Observation Wrappers
        self.env = GrayScaleObservation(self.env)
        self.env = ResizeObservation(self.env, 84)
        # Apply Control Wrappers
        self.env = JoypadSpace(self.env, SIMPLE_MOVEMENT)
        self.env = NoopResetEnv(self.env)
        # Apply Frame Wrappers
        self.env = SkipFrame(self.env, 4)
        self.env = FrameStack(self.env, 4)

        self.agent = DQNAgent(stateShape=(84, 84, 4),
                              actionSpace=self.env.action_space, numPicks=32, memorySize=100000)
Example #4
0
    def __init__(self, episodes, checkpoint, current_episode, epsilon):
        self.current_episode = current_episode
        self.episodes = episodes

        self.episode_score = []
        self.episode_qs = []
        self.episode_distance = []
        self.episode_loss = []
        self.episode_policies = []

        self.fig, self.ax = plt.subplots(1, 2, figsize=(12, 4))
        self.fig.canvas.draw()

        self.env = gym_super_mario_bros.make('SuperMarioBros-1-1-v0')
        # Apply Observation Wrappers
        self.env = GrayScaleObservation(self.env)
        self.env = ResizeObservation(self.env, 84)
        # Apply Control Wrappers
        self.env = JoypadSpace(self.env, SIMPLE_MOVEMENT)
        self.env = NoopResetEnv(self.env)
        # Apply Frame Wrappers
        self.env = SkipFrame(self.env, 4)
        self.env = FrameStack(self.env, 4)

        self.agent = DQNAgent(stateShape=(4, 84, 84),
                              actionSpace=self.env.action_space,
                              numPicks=32,
                              memorySize=20000,
                              numRewards=4,
                              epsilon=epsilon,
                              checkpoint=checkpoint)
Example #5
0
    def __init__(self, env_name):
        env = gym.make(env_name)

        self.env = GrayScaleObservation(env)
        self.env = NormalizeObservation(self.env)
        self.env = FrameStack(self.env, 4)

        gym.Wrapper.__init__(self, self.env)
Example #6
0
 def _inner() -> gym.Env:
     env = gym.make(ENV_NAME, verbose=0)
     env.seed(seed)
     if not is_eval:
         env = Monitor(env, run_dir)
     env = GrayScaleObservation(env, keep_dim=True)
     if frame_skip > 0:
         env = MaxAndSkipEnv(env, skip=frame_skip)
     return env
Example #7
0
def wrapper(env):
    # skip to every 4th frame. Remove redundant info. to speed up training
    env = SkipEnv(env, skip=4)
    # rgb to gray. Reduce input dimension thus model size
    env = GrayScaleObservation(env, keep_dim=False)
    # resize to 84 x 84. Reduce input dimension thus model size
    env = ResizeObservation(env, shape=84)
    # make obs a stack of previous 3 frames. Need consecutive frames
    # to differentiate landing vs. taking off
    env = FrameStack(env, num_stack=4)
    return env
Example #8
0
def setup_wrappers(env):
    obs_shape = env.observation_space.shape
    is_image = len(obs_shape) == 3
    if is_image:
        from gym.wrappers import GrayScaleObservation
        from gym.wrappers import FlattenObservation
        from gym.wrappers import ResizeObservation

        env = GrayScaleObservation(env)
        # env = ResizeObservation(env, (obs_shape[0]//3, obs_shape[0]//3))
        env = FlattenObservation(env)

    return env
Example #9
0
class Breakout(AbstractGame):
    """The Gym CartPole environment"""
    def __init__(self, discount: float):
        super().__init__(discount)
        self.env = gym.make('BreakoutDeterministic-v4')
        self.env = ResizeObservation(self.env, shape=(84, 84))
        self.env = GrayScaleObservation(self.env, keep_dim=True)
        self.actions = list(
            map(lambda i: Action(i), range(self.env.action_space.n)))
        self.observations = [self.env.reset()]
        self.done = False

    @property
    def action_space_size(self) -> int:
        """Return the size of the action space."""
        return len(self.actions)

    def step(self, action) -> int:
        """Execute one step of the game conditioned by the given action."""

        observation, reward, done, _ = self.env.step(action.index)
        self.observations += [observation]
        self.done = done
        return reward

    def terminal(self) -> bool:
        """Is the game is finished?"""
        return self.done

    def legal_actions(self) -> List[Action]:
        """Return the legal actions available at this instant."""
        return self.actions

    def make_image(self, state_index: int):
        """Compute the state of the game."""
        return self.observations[state_index]
def create_env(env_name="SuperMarioBros-1-1-v0"):
    env = gym_super_mario_bros.make(env_name)

    # Restricts action space to only "right" and "jump + right"
    env = JoypadSpace(env, [["right"], ["right", "A"]])
    # Accumulates rewards every 4th frame
    env = SkipFrame(env, skip=4)
    # Transform RGB image to graycale, [240, 256]
    env = GrayScaleObservation(env)
    # Downsample to new size, [1, 84, 84]
    env = ResizeObservation(env, shape=84)
    # Add extra precision to np.array state
    env = TransformObservation(env, f=lambda x: x / 255.)
    # Squash 4 consecutive frames of the environment into a
    # single observation point to feed to our learning model, [4, 84, 84]
    env = FrameStack(env, num_stack=4)
    return env
Example #11
0
from metrics import MetricLogger
from agent import Mario
from wrappers import ResizeObservation, SkipFrame

# Initialize Super Mario environment
env = gym_super_mario_bros.make('SuperMarioBros-1-1-v0')

# Limit the action-space to
#   0. walk right
#   1. jump right
env = JoypadSpace(env, [['right'], ['right', 'A']])

# Apply Wrappers to environment
env = SkipFrame(env, skip=4)
env = GrayScaleObservation(env, keep_dim=False)
env = ResizeObservation(env, shape=84)
env = TransformObservation(env, f=lambda x: x / 255.)
env = FrameStack(env, num_stack=4)

env.reset()

save_dir = Path('checkpoints') / datetime.datetime.now().strftime(
    '%Y-%m-%dT%H-%M-%S')
save_dir.mkdir(parents=True)

checkpoint = None  # Path('checkpoints/2020-10-21T18-25-27/mario.chkpt')

# Add in check to see if GPU is avaliable (BM)
if torch.cuda.is_available():
    device = torch.device("cuda:0")
import gym
import gym_super_mario_bros
from gym.wrappers import FrameStack, GrayScaleObservation, TransformObservation
from nes_py.wrappers import JoypadSpace

from metrics import MetricLogger
from agent import Mario
from wrappers import ResizeObservation, SkipFrame

env = gym_super_mario_bros.make('SuperMarioBros-1-1-v0')

env = JoypadSpace(env, [['right'], ['right', 'A']])

env = SkipFrame(env, skip=4)
env = GrayScaleObservation(env)
env = ResizeObservation(env, shape=84)
env = FrameStack(env, num_stack=4)

env.reset()

save_dir = Path('checkpoints_evaluate') / datetime.datetime.now().strftime(
    '%Y-%m-%dT%H-%M-%S')
save_dir.mkdir(parents=True)

checkpoint = Path('mario_net.chkpt')
mario = Mario(state_dim=(4, 84, 84),
              action_dim=env.action_space.n,
              save_dir=save_dir,
              checkpoint=checkpoint)
mario.exploration_rate = mario.exploration_rate_min
Example #13
0
        epsilon_start = 0.0

    parser = argparse.ArgumentParser(description=None)
    # Question 1
    # Change the var called environment_used to change the environment
    parser.add_argument('env_id', nargs='?', default=environment_used, help='Select the environment to run')
    args = parser.parse_args()
    
    # You can set the level to logger.DEBUG or logger.WARN if you
    # want to change the amount of output.
    logger.set_level(logger.INFO)

    env = gym.make(args.env_id)

    if environment_used != "CartPole-v1":
        env = GrayScaleObservation(env)
        env = ResizeObservation(env, resolution)
        env = FrameStack(env, 4)
        agent = VizdoomAgent(env.action_space, resolution , eta, test_mode, environment_used)
    else:
        agent = RandomAgent(env.action_space, env.observation_space, eta, test_mode, environment_used)

    # You provide the directory to write to (can be an existing
    # directory, including one with existing data -- all monitor files
    # will be namespaced). You can also dump to a tempdir if you'd
    # like: tempfile.mkdtemp().
    outdir = '/tmp/random-agent-results'
    env = wrappers.Monitor(env, directory=outdir, force=True)
    env.seed(0)

    mem = Memory()