def env_creator(env_cls, env_config): env = env_cls(**env_config) if isinstance(env.observation_space, (Box, FloatBox)): obs_min = env.observation_space.low obs_max = env.observation_space.high env = TransformObservation(env, lambda obs: 2 * (obs - obs_min) / (obs_max - obs_min) - 1) elif isinstance(env.observation_space, IntBox): obs_min = env.observation_space.low obs_max = env.observation_space.high env = TransformObservation(env, lambda obs: (obs - obs_min) / (obs_max - obs_min)) return env
def test_transform_observation(env_id): affine_transform = lambda x: 3 * x + 2 env = gym.make(env_id) wrapped_env = TransformObservation(gym.make(env_id), lambda obs: affine_transform(obs)) obs = env.reset(seed=0) wrapped_obs = wrapped_env.reset(seed=0) assert np.allclose(wrapped_obs, affine_transform(obs)) action = env.action_space.sample() obs, reward, done, _ = env.step(action) wrapped_obs, wrapped_reward, wrapped_done, _ = wrapped_env.step(action) assert np.allclose(wrapped_obs, affine_transform(obs)) assert np.allclose(wrapped_reward, reward) assert wrapped_done == done
def make_atari_env(env_name): env = gym.make(env_name) env = FireReset(env) env = AtariPreprocessing(env, noop_max=30, frame_skip=2, screen_size=84, terminal_on_life_loss=False, grayscale_obs=True, scale_obs=False) env = PyTorchImageWrapper(env) env = FrameStack(env, num_stack=4) env = TransformObservation(env, f=np.array) env = ConcatWrapper(env, axis=0) env = TransformObservation( env, f=lambda obs: np.asarray(obs, dtype=np.float32) / 255.0) return env
def get_env(*args, **kwargs): return GymEnvWrapper( TransformObservation(env=FrameStack( num_stack=4, env=(gym_wrapper.GymFromDMEnv( bsuite.load_and_record_to_csv( bsuite_id=bsuite_id, results_dir=results_dir, overwrite=True, )) if not gym_id else gym.make(gym_id))), f=lambda lazy_frames: np.reshape( np.stack(lazy_frames._frames), -1)))
def make_env(seed, max_episode_steps=1500): env = WestWorld( seed=seed, room_size=2, obs_width=64, obs_height=64, max_episode_steps=max_episode_steps, decore_option=DecoreOption.PORTRAIT, num_chars_on_wall=1, ) env = TransformObservation( env, f=lambda obs: np.asarray(obs, dtype=np.float32) / 255.0) env = PyTorchImageWrapper(env) return env
def create_env(env_name="SuperMarioBros-1-1-v0"): env = gym_super_mario_bros.make(env_name) # Restricts action space to only "right" and "jump + right" env = JoypadSpace(env, [["right"], ["right", "A"]]) # Accumulates rewards every 4th frame env = SkipFrame(env, skip=4) # Transform RGB image to graycale, [240, 256] env = GrayScaleObservation(env) # Downsample to new size, [1, 84, 84] env = ResizeObservation(env, shape=84) # Add extra precision to np.array state env = TransformObservation(env, f=lambda x: x / 255.) # Squash 4 consecutive frames of the environment into a # single observation point to feed to our learning model, [4, 84, 84] env = FrameStack(env, num_stack=4) return env
import datetime import gym import matplotlib.pyplot as plt import numpy as np from gym.wrappers import FrameStack, GrayScaleObservation, Monitor, TransformObservation from torch.optim import Adam import autonomous_systems_project.callbacks as cb from autonomous_systems_project.agents import DoubleDQNAgent, DQNAgent, SimpleDQN from autonomous_systems_project.memory import RandomReplayMemory env = gym.make("CartPole-v1") env = TransformObservation(env, lambda obs: np.array(obs).astype(np.float32)) policy_net = SimpleDQN(4, env.action_space.n) target_net = SimpleDQN(4, env.action_space.n) target_net.load_state_dict(policy_net.state_dict()) gamma = 0.9 batch_size = 32 epsilon_steps = 1000 memory_size = 100000 episodes = 500 lr = 0.001 target_update = 5 parameters = { "env": env.unwrapped.spec.id, "gamma": gamma, "batch_size": batch_size,
from agent import Mario from wrappers import ResizeObservation, SkipFrame # Initialize Super Mario environment env = gym_super_mario_bros.make('SuperMarioBros-1-1-v0') # Limit the action-space to # 0. walk right # 1. jump right env = JoypadSpace(env, [['right'], ['right', 'A']]) # Apply Wrappers to environment env = SkipFrame(env, skip=4) env = GrayScaleObservation(env, keep_dim=False) env = ResizeObservation(env, shape=84) env = TransformObservation(env, f=lambda x: x / 255.) env = FrameStack(env, num_stack=4) env.reset() save_dir = Path('checkpoints') / datetime.datetime.now().strftime( '%Y-%m-%dT%H-%M-%S') save_dir.mkdir(parents=True) checkpoint = None # Path('checkpoints/2020-10-21T18-25-27/mario.chkpt') # Add in check to see if GPU is avaliable (BM) if torch.cuda.is_available(): device = torch.device("cuda:0") print("Using GPU!") else: