Esempio n. 1
0
    def get_frame(self, idx, history_length=1):
        """ Return frame from the buffer """
        if idx >= self.current_size:
            raise VelException("Requested frame beyond the size of the buffer")

        if history_length > 1:
            assert self.state_buffer.shape[
                -1] == 1, "State buffer must have last dimension of 1 if we want frame history"

        accumulator = []

        last_frame = self.state_buffer[idx]
        accumulator.append(last_frame)

        for i in range(history_length - 1):
            prev_idx = (idx - 1) % self.buffer_capacity

            if prev_idx == self.current_idx:
                raise VelException(
                    "Cannot provide enough history for the frame")
            elif self.dones_buffer[prev_idx]:
                # If previous frame was done - just append zeroes
                accumulator.append(np.zeros_like(last_frame))
            else:
                idx = prev_idx
                accumulator.append(self.state_buffer[idx])

        # We're pushing the elements in reverse order
        # return np.concatenate(accumulator[::-1], axis=-1)
        return accumulator[::-1]
Esempio n. 2
0
    def get_frame(self, frame_idx, env_idx):
        """ Return frame from the buffer """
        if frame_idx >= self.current_size:
            raise VelException("Requested frame beyond the size of the buffer")

        accumulator = []

        last_frame = self.state_buffer[frame_idx, env_idx]

        accumulator.append(last_frame)

        for i in range(self.frame_history - 1):
            prev_idx = (frame_idx - 1) % self.buffer_capacity

            if prev_idx == self.current_idx:
                raise VelException("Cannot provide enough history for the frame")
            elif self.dones_buffer[prev_idx, env_idx]:
                # If previous frame was done - just append zeroes
                accumulator.append(np.zeros_like(last_frame))
            else:
                frame_idx = prev_idx
                accumulator.append(self.state_buffer[frame_idx, env_idx])

        # We're pushing the elements in reverse order
        return np.concatenate(accumulator[::-1], axis=-1)
Esempio n. 3
0
    def sample_batch_rollout(self, rollout_length, history_length):
        """ Return indexes of next sample """

        # Sample from up to total size
        if self.current_size < self.buffer_capacity:
            if rollout_length + 1 > self.current_size:
                raise VelException(
                    "Not enough elements in the buffer to sample the rollout")

            # -1 because we cannot take the last one
            return np.random.choice(self.current_size -
                                    rollout_length) + rollout_length - 1
        else:
            if rollout_length + history_length > self.current_size:
                raise VelException(
                    "Not enough elements in the buffer to sample the rollout")

            candidate = np.random.choice(self.buffer_capacity)

            # These are the elements we cannot draw, as then we don't have enough history
            forbidden_ones = (np.arange(
                self.current_idx, self.current_idx + history_length +
                rollout_length - 1) % self.buffer_capacity)

            # Exclude these frames for learning as they may have some part of history overwritten
            while candidate in forbidden_ones:
                candidate = np.random.choice(self.buffer_capacity)

            return candidate
Esempio n. 4
0
    def get_frame_with_future(self, frame_idx, history_length=1):
        """ Return frame from the buffer together with the next frame """
        if frame_idx == self.current_idx:
            raise VelException("Cannot provide enough future for the frame")

        past_frame = self.get_frame(frame_idx, history_length)

        if history_length > 1:
            assert self.state_buffer.shape[-1] == 1, \
                "State buffer must have last dimension of 1 if we want frame history"

        if not self.dones_buffer[frame_idx]:
            next_idx = (frame_idx + 1) % self.buffer_capacity
            next_frame = self.state_buffer[next_idx]
        else:
            next_idx = (frame_idx + 1) % self.buffer_capacity
            # next_frame = np.zeros_like(self.state_buffer[next_idx])
            next_frame = {
                'environment':
                np.zeros_like(self.state_buffer[next_idx]['environment']),
                'goal':
                np.zeros_like(self.state_buffer[next_idx]['goal'])
            }

        if history_length > 1:
            future_frame = np.concatenate([
                past_frame.take(indices=np.arange(1, past_frame.shape[-1]),
                                axis=-1), next_frame
            ],
                                          axis=-1)
        else:
            future_frame = next_frame

        return past_frame, future_frame
Esempio n. 5
0
 def resolve(self, parameters):
     """ Resolve given variable """
     if self.default_value == DUMMY_VALUE:
         if self.name in parameters:
             return parameters[self.name]
         else:
             raise VelException(f"Undefined parameter: {self.name}")
     else:
         return parameters.get(self.name, self.default_value)
Esempio n. 6
0
 def resolve(self, _):
     """ Resolve given variable """
     if self.default_value == DUMMY_VALUE:
         if self.name in os.environ:
             return os.environ[self.name]
         else:
             raise VelException(
                 f"Undefined environment variable: {self.name}")
     else:
         return os.environ.get(self.name, self.default_value)
Esempio n. 7
0
    def get_frame_with_future_forward_steps(self, frame_idx, env_idx, forward_steps, discount_factor):
        """ Return frame from the buffer together with the next frame """
        index_array = np.arange(frame_idx, frame_idx+forward_steps) % self.current_size

        if self.current_idx in index_array:
            raise VelException("Cannot provide enough future for the frame")

        past_frame = self.get_frame(frame_idx, env_idx)
        dones_array = self.dones_buffer[index_array, env_idx]

        rewards_array = self.reward_buffer[index_array, env_idx]
        discounted_rewards_array = rewards_array * (discount_factor ** np.arange(forward_steps))

        if dones_array.any():
            # Are we done between current frame and frame + n
            done = True

            dones_shifted = np.zeros_like(dones_array)
            dones_shifted[1:] = dones_array[:-1]
            reward = discounted_rewards_array[~np.logical_or.accumulate(dones_shifted)].sum()
            future_frame = np.zeros_like(past_frame)
        else:
            done = False
            reward = discounted_rewards_array.sum()

            if forward_steps >= self.frame_history:
                frame_indices = (index_array[:self.frame_history] + 1) % self.buffer_capacity
                future_frame = np.moveaxis(self.state_buffer[frame_indices, env_idx], 0, -2).reshape(past_frame.shape)
            else:
                frame_candidate = np.moveaxis(
                    self.state_buffer[(index_array + 1) % self.buffer_capacity, env_idx], 0, -2
                )
                frame_candidate_target_shape = (
                    list(frame_candidate.shape[:-2]) + [frame_candidate.shape[-2] * frame_candidate.shape[-1]]
                )

                future_frame = np.concatenate([
                    past_frame[..., (frame_candidate_target_shape[-1] - past_frame.shape[-1]):],
                    frame_candidate.reshape(frame_candidate_target_shape)
                ], -1)

        return past_frame, future_frame, reward, done
Esempio n. 8
0
    def get_frame_with_future(self, frame_idx, env_idx):
        """ Return frame from the buffer together with the next frame """
        if frame_idx == self.current_idx:
            raise VelException("Cannot provide enough future for the frame")

        past_frame = self.get_frame(frame_idx, env_idx)

        if not self.dones_buffer[frame_idx, env_idx]:
            # We're not done
            next_idx = (frame_idx + 1) % self.buffer_capacity
            next_frame = self.state_buffer[next_idx, env_idx]

            if self.frame_history > 1:
                future_frame = np.concatenate([
                    past_frame.take(indices=np.arange(1, past_frame.shape[-1]), axis=-1), next_frame
                ], axis=-1)
            else:
                future_frame = next_frame
        else:
            # We are done
            future_frame = np.zeros_like(past_frame)

        return past_frame, future_frame
Esempio n. 9
0
 def result(self) -> dict:
     """ Result of the epoch """
     if self._result is None:
         raise VelException("Result has not been frozen yet")
     else:
         return self._result