def get_frame(self, idx, history_length=1): """ Return frame from the buffer """ if idx >= self.current_size: raise VelException("Requested frame beyond the size of the buffer") if history_length > 1: assert self.state_buffer.shape[ -1] == 1, "State buffer must have last dimension of 1 if we want frame history" accumulator = [] last_frame = self.state_buffer[idx] accumulator.append(last_frame) for i in range(history_length - 1): prev_idx = (idx - 1) % self.buffer_capacity if prev_idx == self.current_idx: raise VelException( "Cannot provide enough history for the frame") elif self.dones_buffer[prev_idx]: # If previous frame was done - just append zeroes accumulator.append(np.zeros_like(last_frame)) else: idx = prev_idx accumulator.append(self.state_buffer[idx]) # We're pushing the elements in reverse order # return np.concatenate(accumulator[::-1], axis=-1) return accumulator[::-1]
def get_frame(self, frame_idx, env_idx): """ Return frame from the buffer """ if frame_idx >= self.current_size: raise VelException("Requested frame beyond the size of the buffer") accumulator = [] last_frame = self.state_buffer[frame_idx, env_idx] accumulator.append(last_frame) for i in range(self.frame_history - 1): prev_idx = (frame_idx - 1) % self.buffer_capacity if prev_idx == self.current_idx: raise VelException("Cannot provide enough history for the frame") elif self.dones_buffer[prev_idx, env_idx]: # If previous frame was done - just append zeroes accumulator.append(np.zeros_like(last_frame)) else: frame_idx = prev_idx accumulator.append(self.state_buffer[frame_idx, env_idx]) # We're pushing the elements in reverse order return np.concatenate(accumulator[::-1], axis=-1)
def sample_batch_rollout(self, rollout_length, history_length): """ Return indexes of next sample """ # Sample from up to total size if self.current_size < self.buffer_capacity: if rollout_length + 1 > self.current_size: raise VelException( "Not enough elements in the buffer to sample the rollout") # -1 because we cannot take the last one return np.random.choice(self.current_size - rollout_length) + rollout_length - 1 else: if rollout_length + history_length > self.current_size: raise VelException( "Not enough elements in the buffer to sample the rollout") candidate = np.random.choice(self.buffer_capacity) # These are the elements we cannot draw, as then we don't have enough history forbidden_ones = (np.arange( self.current_idx, self.current_idx + history_length + rollout_length - 1) % self.buffer_capacity) # Exclude these frames for learning as they may have some part of history overwritten while candidate in forbidden_ones: candidate = np.random.choice(self.buffer_capacity) return candidate
def get_frame_with_future(self, frame_idx, history_length=1): """ Return frame from the buffer together with the next frame """ if frame_idx == self.current_idx: raise VelException("Cannot provide enough future for the frame") past_frame = self.get_frame(frame_idx, history_length) if history_length > 1: assert self.state_buffer.shape[-1] == 1, \ "State buffer must have last dimension of 1 if we want frame history" if not self.dones_buffer[frame_idx]: next_idx = (frame_idx + 1) % self.buffer_capacity next_frame = self.state_buffer[next_idx] else: next_idx = (frame_idx + 1) % self.buffer_capacity # next_frame = np.zeros_like(self.state_buffer[next_idx]) next_frame = { 'environment': np.zeros_like(self.state_buffer[next_idx]['environment']), 'goal': np.zeros_like(self.state_buffer[next_idx]['goal']) } if history_length > 1: future_frame = np.concatenate([ past_frame.take(indices=np.arange(1, past_frame.shape[-1]), axis=-1), next_frame ], axis=-1) else: future_frame = next_frame return past_frame, future_frame
def resolve(self, parameters): """ Resolve given variable """ if self.default_value == DUMMY_VALUE: if self.name in parameters: return parameters[self.name] else: raise VelException(f"Undefined parameter: {self.name}") else: return parameters.get(self.name, self.default_value)
def resolve(self, _): """ Resolve given variable """ if self.default_value == DUMMY_VALUE: if self.name in os.environ: return os.environ[self.name] else: raise VelException( f"Undefined environment variable: {self.name}") else: return os.environ.get(self.name, self.default_value)
def get_frame_with_future_forward_steps(self, frame_idx, env_idx, forward_steps, discount_factor): """ Return frame from the buffer together with the next frame """ index_array = np.arange(frame_idx, frame_idx+forward_steps) % self.current_size if self.current_idx in index_array: raise VelException("Cannot provide enough future for the frame") past_frame = self.get_frame(frame_idx, env_idx) dones_array = self.dones_buffer[index_array, env_idx] rewards_array = self.reward_buffer[index_array, env_idx] discounted_rewards_array = rewards_array * (discount_factor ** np.arange(forward_steps)) if dones_array.any(): # Are we done between current frame and frame + n done = True dones_shifted = np.zeros_like(dones_array) dones_shifted[1:] = dones_array[:-1] reward = discounted_rewards_array[~np.logical_or.accumulate(dones_shifted)].sum() future_frame = np.zeros_like(past_frame) else: done = False reward = discounted_rewards_array.sum() if forward_steps >= self.frame_history: frame_indices = (index_array[:self.frame_history] + 1) % self.buffer_capacity future_frame = np.moveaxis(self.state_buffer[frame_indices, env_idx], 0, -2).reshape(past_frame.shape) else: frame_candidate = np.moveaxis( self.state_buffer[(index_array + 1) % self.buffer_capacity, env_idx], 0, -2 ) frame_candidate_target_shape = ( list(frame_candidate.shape[:-2]) + [frame_candidate.shape[-2] * frame_candidate.shape[-1]] ) future_frame = np.concatenate([ past_frame[..., (frame_candidate_target_shape[-1] - past_frame.shape[-1]):], frame_candidate.reshape(frame_candidate_target_shape) ], -1) return past_frame, future_frame, reward, done
def get_frame_with_future(self, frame_idx, env_idx): """ Return frame from the buffer together with the next frame """ if frame_idx == self.current_idx: raise VelException("Cannot provide enough future for the frame") past_frame = self.get_frame(frame_idx, env_idx) if not self.dones_buffer[frame_idx, env_idx]: # We're not done next_idx = (frame_idx + 1) % self.buffer_capacity next_frame = self.state_buffer[next_idx, env_idx] if self.frame_history > 1: future_frame = np.concatenate([ past_frame.take(indices=np.arange(1, past_frame.shape[-1]), axis=-1), next_frame ], axis=-1) else: future_frame = next_frame else: # We are done future_frame = np.zeros_like(past_frame) return past_frame, future_frame
def result(self) -> dict: """ Result of the epoch """ if self._result is None: raise VelException("Result has not been frozen yet") else: return self._result