Ejemplo n.º 1
0
def env_creator(env_cls, env_config):
    env = env_cls(**env_config)
    if isinstance(env.observation_space, (Box, FloatBox)):
        obs_min = env.observation_space.low
        obs_max = env.observation_space.high
        env = TransformObservation(env, lambda obs: 2 * (obs - obs_min) / (obs_max - obs_min) - 1)
    elif isinstance(env.observation_space, IntBox):
        obs_min = env.observation_space.low
        obs_max = env.observation_space.high
        env = TransformObservation(env, lambda obs: (obs - obs_min) / (obs_max - obs_min))
    return env
Ejemplo n.º 2
0
def test_transform_observation(env_id):
    affine_transform = lambda x: 3 * x + 2
    env = gym.make(env_id)
    wrapped_env = TransformObservation(gym.make(env_id),
                                       lambda obs: affine_transform(obs))

    obs = env.reset(seed=0)
    wrapped_obs = wrapped_env.reset(seed=0)
    assert np.allclose(wrapped_obs, affine_transform(obs))

    action = env.action_space.sample()
    obs, reward, done, _ = env.step(action)
    wrapped_obs, wrapped_reward, wrapped_done, _ = wrapped_env.step(action)
    assert np.allclose(wrapped_obs, affine_transform(obs))
    assert np.allclose(wrapped_reward, reward)
    assert wrapped_done == done
Ejemplo n.º 3
0
def make_atari_env(env_name):
    env = gym.make(env_name)
    env = FireReset(env)
    env = AtariPreprocessing(env,
                             noop_max=30,
                             frame_skip=2,
                             screen_size=84,
                             terminal_on_life_loss=False,
                             grayscale_obs=True,
                             scale_obs=False)
    env = PyTorchImageWrapper(env)
    env = FrameStack(env, num_stack=4)
    env = TransformObservation(env, f=np.array)
    env = ConcatWrapper(env, axis=0)
    env = TransformObservation(
        env, f=lambda obs: np.asarray(obs, dtype=np.float32) / 255.0)

    return env
Ejemplo n.º 4
0
 def get_env(*args, **kwargs):
     return GymEnvWrapper(
         TransformObservation(env=FrameStack(
             num_stack=4,
             env=(gym_wrapper.GymFromDMEnv(
                 bsuite.load_and_record_to_csv(
                     bsuite_id=bsuite_id,
                     results_dir=results_dir,
                     overwrite=True,
                 )) if not gym_id else gym.make(gym_id))),
                              f=lambda lazy_frames: np.reshape(
                                  np.stack(lazy_frames._frames), -1)))
Ejemplo n.º 5
0
def make_env(seed, max_episode_steps=1500):
    env = WestWorld(
        seed=seed,
        room_size=2,
        obs_width=64,
        obs_height=64,
        max_episode_steps=max_episode_steps,
        decore_option=DecoreOption.PORTRAIT,
        num_chars_on_wall=1,
    )
    env = TransformObservation(
        env, f=lambda obs: np.asarray(obs, dtype=np.float32) / 255.0)
    env = PyTorchImageWrapper(env)
    return env
Ejemplo n.º 6
0
def create_env(env_name="SuperMarioBros-1-1-v0"):
    env = gym_super_mario_bros.make(env_name)

    # Restricts action space to only "right" and "jump + right"
    env = JoypadSpace(env, [["right"], ["right", "A"]])
    # Accumulates rewards every 4th frame
    env = SkipFrame(env, skip=4)
    # Transform RGB image to graycale, [240, 256]
    env = GrayScaleObservation(env)
    # Downsample to new size, [1, 84, 84]
    env = ResizeObservation(env, shape=84)
    # Add extra precision to np.array state
    env = TransformObservation(env, f=lambda x: x / 255.)
    # Squash 4 consecutive frames of the environment into a
    # single observation point to feed to our learning model, [4, 84, 84]
    env = FrameStack(env, num_stack=4)
    return env
import datetime

import gym
import matplotlib.pyplot as plt
import numpy as np
from gym.wrappers import FrameStack, GrayScaleObservation, Monitor, TransformObservation
from torch.optim import Adam

import autonomous_systems_project.callbacks as cb
from autonomous_systems_project.agents import DoubleDQNAgent, DQNAgent, SimpleDQN
from autonomous_systems_project.memory import RandomReplayMemory

env = gym.make("CartPole-v1")
env = TransformObservation(env, lambda obs: np.array(obs).astype(np.float32))

policy_net = SimpleDQN(4, env.action_space.n)
target_net = SimpleDQN(4, env.action_space.n)
target_net.load_state_dict(policy_net.state_dict())

gamma = 0.9
batch_size = 32
epsilon_steps = 1000
memory_size = 100000
episodes = 500
lr = 0.001
target_update = 5

parameters = {
    "env": env.unwrapped.spec.id,
    "gamma": gamma,
    "batch_size": batch_size,
Ejemplo n.º 8
0
from agent import Mario
from wrappers import ResizeObservation, SkipFrame

# Initialize Super Mario environment
env = gym_super_mario_bros.make('SuperMarioBros-1-1-v0')

# Limit the action-space to
#   0. walk right
#   1. jump right
env = JoypadSpace(env, [['right'], ['right', 'A']])

# Apply Wrappers to environment
env = SkipFrame(env, skip=4)
env = GrayScaleObservation(env, keep_dim=False)
env = ResizeObservation(env, shape=84)
env = TransformObservation(env, f=lambda x: x / 255.)
env = FrameStack(env, num_stack=4)

env.reset()

save_dir = Path('checkpoints') / datetime.datetime.now().strftime(
    '%Y-%m-%dT%H-%M-%S')
save_dir.mkdir(parents=True)

checkpoint = None  # Path('checkpoints/2020-10-21T18-25-27/mario.chkpt')

# Add in check to see if GPU is avaliable (BM)
if torch.cuda.is_available():
    device = torch.device("cuda:0")
    print("Using GPU!")
else: