Beispiel #1
0
 def __init__(self, env, rank=0):
     Wrapper.__init__(self, env=env)
     self.env = env
     self.rank = rank
     self.rewards = []
     self.current_metadata = {}  # extra info that gets injected into each log entry
     self.summaries_dict = {'reward': 0, 'episode_length': 0}
Beispiel #2
0
def test(env: gym.Wrapper, model: tf.keras.Model, log_dir: Path) -> None:
    """Test the DQN on Pong.

    Args:
        env: The Atari Pong environment
        model: The model to be trained
        log_dir: Path where to save the video
    """
    env = Monitor(
        env,
        log_dir,
        force=True,  # overwrite existing videos
        video_callable=lambda count: True,  # force save this episode
    )

    state = Deque[tf.Tensor](maxlen=STATE_FRAMES)
    state.append(preprocess(env.reset()))  # initial state

    print("Starting testing...")
    while True:
        if len(state) < STATE_FRAMES:
            initial = None
            action = env.action_space.sample()
        else:
            initial = tf.stack(state, axis=-1)
            action = choose(model, initial, 0)  # choose greedily

        state_new, _, done, _ = env.step(action)
        state_new = preprocess(state_new)
        state.append(state_new)

        if done:
            break
    print("Testing done")
Beispiel #3
0
 def __init__(self, env, num_skills, meta_agent):
     Wrapper.__init__(self, env)
     self.num_skills = num_skills
     self.meta_agent = meta_agent
     # Each skill equally likely to be chosen
     self.prior_probability_of_skill = 1.0 / self.num_skills
     self._max_episode_steps = self.env._max_episode_steps
Beispiel #4
0
    def __init__(self, env, HIRO_agent, max_sub_policy_timesteps):
        Wrapper.__init__(self, env)
        self.env = env
        self.meta_agent = HIRO_agent
        self.max_sub_policy_timesteps = max_sub_policy_timesteps

        self.track_intrinsic_rewards = []
def run_agent(player: MarioPlayer, env: Wrapper, record: bool, vids_path: str,
              index):
    if record:
        rec_output_path = os.path.join(vids_path,
                                       "{name}.mp4".format(name=index))
        rec = monitor.video_recorder.VideoRecorder(env, path=rec_output_path)

    state = env.reset()
    done = False

    for step in range(steps_limit):
        if done:
            break
        action = player.act(state)
        state, reward, done, info = env.step(action)
        env.render()
        if record:
            rec.capture_frame()
        player.update_info(info)
        player.update_reward(reward)
        if info['flag_get']:  # if got to the flag - run is ended.
            done = True

    if record:
        rec.close()
    player.calculate_fitness()
    outcome = player.get_run_info()
    outcome['index'] = index
    return outcome
Beispiel #6
0
 def __init__(self, env, memory, lock, args):
     GymWrapper.__init__(self, env)
     self.memory = memory
     self.lock = lock  # Lock for memory access
     self.skipframes = args['skip']
     self.environment_name = args['environment_name']
     self.logdir = args['logdir']
     self.current_i = 0
Beispiel #7
0
 def __init__(self, env, stack_size=4):
     Wrapper.__init__(self, env=env)
     self.stack_size = stack_size
     self.observation_space = spaces.Box(low=0,
                                         high=255,
                                         shape=(84, 84, self.stack_size),
                                         dtype=np.uint8)
     self.state = None
Beispiel #8
0
 def __init__(self, env, num_states, num_skills, regularisation_weight, visitations_decay):
     Wrapper.__init__(self, env)
     self.num_skills = num_skills
     self.num_states = num_states
     self.state_visitations = [
         [0 for _ in range(num_states)] for _ in range(num_skills)]
     self.regularisation_weight = regularisation_weight
     self.visitations_decay = visitations_decay
Beispiel #9
0
 def __init__(self, env, logdir, info_keywords=()):
     """
     A monitor wrapper for Gym environments, it is used to know the episode reward, length, time and other data.
     :param env: (Gym environment) The environment
     :param filename: (str) the location to save tensorboard logs
     :param info_keywords: (tuple) extra information to log, from the information return of environment.step
     """
     Wrapper.__init__(self, env=env)
     self.writer = FileWriter(logdir)
     self.info_keywords = info_keywords
     self.episode_info = dict()
     self.total_steps = 0
Beispiel #10
0
    def __init__(
        self,
        env: gym.Env,
        callback: Callback,
    ):
        """Initialize.

        :param env: Gym environment to wrap
        :param callback: a callback object
        """
        Wrapper.__init__(self, env)
        self._callback = callback
Beispiel #11
0
    def __init__(self,
                 env,
                 n_skills,
                 total_timesteps=None,
                 batch_size=64,
                 hidden_dim=128,
                 lr=1e-3):
        """
        Args:
            env (gym env)
            n_skills (int) : number of skills
            total_timesteps (int) : same parameter as algorithm.
                If not None, DIAYN is in "training" mode : a progress bar will
                appear during training. If None, there would be no progress bar.
            hidden_dim (int) : dimension of latent space
            lr (float) : learning rate
        """
        Wrapper.__init__(self, env)
        self.n_skills = n_skills
        self.hidden_dim = hidden_dim
        self.state_size = env.observation_space.shape[0]
        self.lr = lr
        self.batch_size = batch_size
        self.probability_of_skill = 1 / self.n_skills

        self.discriminator = nn.Sequential(
            nn.Linear(self.state_size, self.hidden_dim), nn.ReLU(),
            nn.Linear(self.hidden_dim, self.hidden_dim), nn.ReLU(),
            nn.Linear(self.hidden_dim, self.n_skills))

        self.discriminator_optimizer = optim.Adam(
            self.discriminator.parameters(), lr=self.lr)
        self.discriminator_optimizer.zero_grad()

        # Set up the environment
        self.env.observation_space.shape = (self.state_size + self.n_skills, )

        # Init skill and "loggers"
        self.skill = np.random.randint(self.n_skills)

        self.training_mode = total_timesteps is not None
        self.pbar = tqdm(total=total_timesteps, disable=not self.training_mode)
        self.pbar.set_postfix_str("Ready to train !")
        self.current_experiment_number = [0]
        self.discriminator_losses = []

        self.current_step = 0

        if self.training_mode:
            self.buffer = Buffer(total_timesteps,
                                 self.state_size + 1)  # state + current skill
Beispiel #12
0
    def fix_visual_game(env: gym.Wrapper) -> gym.Wrapper:
        w, h, channels = env.observation_space.shape
        setattr(env, 'env_name', env.unwrapped.spec.id)
        setattr(env, 'state_num', (channels, w, h))
        setattr(env, 'state_dim', 3)
        setattr(env, 'action_dim', env.action_space.n)
        setattr(env, 'if_discrete',
                isinstance(env.action_space, gym.spaces.Discrete))
        target_reward = getattr(env, 'target_reward', None)
        target_reward_default = getattr(env.spec, 'reward_threshold', None)
        if target_reward is None:
            target_reward = target_reward_default
        if target_reward is None:
            target_reward = 2**16
        setattr(env, 'target_reward', target_reward)

        def convert_image_shape(img: np.ndarray) -> np.ndarray:
            (w, h, channels) = img.shape
            return img.reshape((channels, w, h))

        def fix_step(env_step):
            def step(action):
                observation, reward, terminal, info = env_step(action)
                print(type(observation))
                return convert_image_shape(observation), reward, terminal, info

            return step

        env.step = fix_step(env.step)
        return env
Beispiel #13
0
    def __init__(self, length, markovian=True, can_stay=False):
        layout = np.zeros(shape=(3, length + 1), dtype=np.int)

        layout[:, 0] = 1
        layout[1, :] = 1
        layout[:, -1] = 1

        entries = [(0, 0), (2, 0)]
        exits = [(0, length)]
        traps = [(2, length)]

        MazeClass = MarkovianMaze if markovian else NonMarkovianMaze

        maze = MazeClass(layout, entries, exits, traps, can_stay, -1.0, -1.0,
                         length + 1, -1)

        Wrapper.__init__(self, maze)
Beispiel #14
0
 def __init__(self, env, start_obss, end_obss):
     """
     Creates an environment tailored to train a single (missing) skill. Trajectories are initialized in start_obss
     state and terminated (and reward is generated) upon reaching end_obs state.
     :param env: AsaEnv environment to wrap. Environment is cloned to sustain integrity of original env.
     :param start_obss: Tensor of experienced starting observations (where skill should initiate)
     :param end_obss: Tensor of experienced ending observations (where skill should terminate)
     """
     Serializable.quick_init(self, locals())
     Wrapper.__init__(self, AsaEnv.clone_wrapped(
         env))  # this clones base env along with all wrappers
     if start_obss.shape != end_obss.shape:
         raise ValueError(
             'start_obss ({}) and end_obss ({}) must be of same shape'.
             format(start_obss.shape, end_obss.shape))
     self._end_obss = end_obss.reshape((end_obss.shape[0], -1))
     self._start_obss = start_obss.reshape((start_obss.shape[0], -1))
     self.current_obs_idx = None
Beispiel #15
0
 def __init__(self, env, k, axis=None):
     """Stack k last observations.
        If axis == None, create a new 0 dimension and concatenate along it
        Otherwise, concatenate observations along the given axis.
     """
     Wrapper.__init__(self, env)
     self.k = k
     self.axis = axis
     self.obs = deque([], maxlen=k)
     shp = list(env.observation_space.shape)
     dim = len(shp)
     if axis:
         assert axis < dim, "Axis {} is out of bounds for observations of dimension {}".format(axis, dim)
         self.stack = lambda x: np.concatenate(list(x), axis=axis)
         shp[axis] *= k
         self.observation_space = spaces.Box(low=0, high=255, shape=tuple(shp))
     else:
         self.stack = lambda x: np.stack(list(x), axis=0)
         self.observation_space = spaces.Box(low=0, high=255, shape=(k,) + tuple(shp))
Beispiel #16
0
def _test(id: int, env: gym.Wrapper, model: TD3Network, render: bool = False, recording_path=None,
          save_video=False):
    episode_rewards = []
    action_repeats = []

    state = env.reset()
    done = False
    episode_images = []

    while not done:
        # get action
        state = torch.FloatTensor(state).unsqueeze(0)
        action = model.actor(state)
        repeat_q = model.critic_1(state, action)
        repeat_idx = repeat_q.argmax(1).item()

        action = action.data.cpu().numpy()[0]
        repeat = model.action_repeats[repeat_idx]
        action_repeats.append(repeat)

        for _ in range(repeat):
            if render:
                if save_video:
                    img = env.render(mode='rgb_array')
                    episode_images.append(img)
                else:
                    env.render(mode='human')

            # step
            state, reward, done, info = env.step(action)
            episode_rewards.append(reward)
            if done:
                break

    if render and save_video:
        write_gif(episode_images, action_repeats, episode_rewards,
                  os.path.join(recording_path, 'ep_{}.gif'.format(id)))

    return sum(episode_rewards), action_repeats
    def __init__(self,
                 env,
                 num_stack,
                 use_lazy_frame=False,
                 lz4_compress=False):
        Wrapper.__init__(self, env)
        BaseWrapper.__init__(self)

        self.num_stack = num_stack
        self.lz4_compress = lz4_compress
        self.use_lazy_frame = use_lazy_frame

        self.frames = deque(maxlen=num_stack)

        low = np.repeat(self.observation_space.low[np.newaxis, ...],
                        num_stack,
                        axis=0)
        high = np.repeat(self.observation_space.high[np.newaxis, ...],
                         num_stack,
                         axis=0)
        self.observation_space = Box(low=low,
                                     high=high,
                                     dtype=self.observation_space.dtype)
 def __init__(self, env):
     Wrapper.__init__(self, env)
     self.game_over = False
Beispiel #19
0
 def __init__(self, env):
     Wrapper.__init__(self, env)
     MultiAgentEnv.__init__(self, getattr_unwrapped(env, 'num_agents'))
Beispiel #20
0
 def __init__(self, env):
     Wrapper.__init__(self, env)
     self.action_space = gym.spaces.Tuple((self.action_space, ))
     self.observation_space = gym.spaces.Tuple((self.observation_space, ))
     MultiAgentEnv.__init__(self, num_agents=1)
Beispiel #21
0
 def __init__(self, env, HIRO_agent, max_sub_policy_timesteps):
     Wrapper.__init__(self, env)
     self.env = env
     self.meta_agent = HIRO_agent
     self.max_sub_policy_timesteps = max_sub_policy_timesteps
Beispiel #22
0
 def __init__(self, env, HIRO_agent):
     Wrapper.__init__(self, env)
     self.env = env
     self.HIRO_agent = HIRO_agent
     self.action_space = self.observation_space
Beispiel #23
0
 def __init__(self, env, lower_level_agent,
              timesteps_to_give_up_control_for, num_skills):
     Wrapper.__init__(self, env)
     self.action_space = spaces.Discrete(num_skills)
     self.lower_level_agent = lower_level_agent
     self.timesteps_to_give_up_control_for = timesteps_to_give_up_control_for
Beispiel #24
0
 def __init__(self, env, warm_up_examples = 0):
   Wrapper.__init__(self, env)
   self.warm_up_examples = warm_up_examples
   self.warm_up_action = 0
   self.observation_space = Box(low=0, high=255, shape=(210, 160, 3), dtype=np.uint8)
Beispiel #25
0
 def __init__(self, env):
     Wrapper.__init__(self, env)
     self.frame_stack = deque(maxlen=4)
Beispiel #26
0
    def __init__(self,
                 env: Union[EnvDataset, PolicyEnv] = None,
                 dataset: Union[EnvDataset, PolicyEnv] = None,
                 batch_size: int = None,
                 num_workers: int = None,
                 **kwargs):
        assert not (
            env is None and dataset is None
        ), "One of the `dataset` or `env` arguments must be passed."
        assert not (
            env is not None and dataset is not None
        ), "Only one of the `dataset` and `env` arguments can be used."

        if not isinstance(env, IterableDataset):
            raise RuntimeError(
                f"The env {env} isn't an interable dataset! (You can use the "
                f"EnvDataset or PolicyEnv wrappers to make an IterableDataset "
                f"from a gym environment.")

        if isinstance(env.unwrapped, VectorEnv):
            if batch_size is not None and batch_size != env.num_envs:
                logger.warning(
                    UserWarning(
                        f"The provided batch size {batch_size} will be ignored, since "
                        f"the provided env is vectorized with a batch_size of "
                        f"{env.unwrapped.num_envs}."))
            batch_size = env.num_envs

        if isinstance(env.unwrapped, BatchedVectorEnv):
            num_workers = env.n_workers
        elif isinstance(env.unwrapped, AsyncVectorEnv):
            num_workers = env.num_envs
        else:
            num_workers = 0

        self.env = env
        # TODO: We could also perhaps let those parameters through to the
        # constructor of DataLoader, because in __iter__ we're not using the
        # DataLoader iterator anyway! This would have the benefit that the
        # batch_size and num_workers attributes would reflect the actual state
        # of the iterator, and things like pytorch-lightning would stop warning
        # us that the num_workers is too low.
        super().__init__(
            dataset=self.env,
            # The batch size is None, because the VecEnv takes care of
            # doing the batching for us.
            batch_size=batch_size,
            num_workers=num_workers,
            # collate_fn=None,
            **kwargs,
        )
        Wrapper.__init__(self, env=self.env)
        assert not isinstance(
            self.env, GymDataLoader), "Something very wrong is happening."

        # self.max_epochs: int = max_epochs
        self.observation_space: gym.Space = self.env.observation_space
        self.action_space: gym.Space = self.env.action_space
        self.reward_space: gym.Space
        if isinstance(env.unwrapped, VectorEnv):
            env: VectorEnv
            batch_size = env.num_envs
            # TODO: Overwriting the action space to be the 'batched' version of
            # the single action space, rather than a Tuple(Discrete, ...) as is
            # done in the gym.vector.VectorEnv.
            self.action_space = batch_space(env.single_action_space,
                                            batch_size)

        if not hasattr(self.env, "reward_space"):
            self.reward_space = spaces.Box(
                low=self.env.reward_range[0],
                high=self.env.reward_range[1],
                shape=(),
            )
            if isinstance(self.env.unwrapped, VectorEnv):
                # Same here, we use a 'batched' space rather than Tuple.
                self.reward_space = batch_space(self.reward_space, batch_size)
Beispiel #27
0
 def __init__(self, env):
     Wrapper.__init__(self, env)
     if len(env.unwrapped.get_action_meanings()) < 3:
         raise ValueError('Expected an action space of at least 3!')
 def __init__(self, env, num_skills, timesteps_before_changing_skill,
              skills_agent):
     Wrapper.__init__(self, env)
     self.action_space = spaces.Discrete(num_skills)
     self.timesteps_before_changing_skill = timesteps_before_changing_skill
     self.skills_agent = skills_agent
Beispiel #29
0
 def __init__(self, env, rank=0):
     Wrapper.__init__(self, env=env)
     self.rank = rank
     self.rewards = []
     self.current_metadata = {}
     self.info = {'reward': 0, 'episode_length': 0}
Beispiel #30
0
 def __init__(self, env, max_n_noops):
     Wrapper.__init__(self, env)
     self.max_n_noops = max_n_noops