def __init__(self, max_replay_buffer_size, env, env_info_sizes=None, probability_function='Linear', steps_to_end=10, threshold=0.7): """ :param max_replay_buffer_size: :param env: :param env_info_sizes: :param probability_function: :param steps_to_end: :param threshold: """ self.env = env self._ob_space = env.observation_space self._action_space = env.action_space self._probability_function = probability_function self._steps_to_end = steps_to_end self._threshold = threshold if env_info_sizes is None: if hasattr(env, 'info_sizes'): env_info_sizes = env.info_sizes else: env_info_sizes = dict() super().__init__(max_replay_buffer_size=max_replay_buffer_size, observation_dim=get_dim(self._ob_space), action_dim=get_dim(self._action_space), env_info_sizes=env_info_sizes) self._terminals = np.zeros((max_replay_buffer_size, 1))
def __init__( self, max_replay_buffer_size, env, env_info_sizes=None, buffer_path='', preload_buffer=False, priorite=False, ): """ :param max_replay_buffer_size: :param env: """ self.env = env self._ob_space = env.observation_space self._action_space = env.action_space if env_info_sizes is None: if hasattr(env, 'info_sizes'): env_info_sizes = env.info_sizes else: env_info_sizes = dict() super().__init__( max_replay_buffer_size=max_replay_buffer_size, observation_dim=get_dim(self._ob_space), action_dim=get_dim(self._action_space), env_info_sizes=env_info_sizes, preload_buffer=preload_buffer, path=buffer_path, priority=priority, )
def __init__( self, max_replay_buffer_size, env, env_info_sizes=None, dtype='float32' ): """ :param max_replay_buffer_size: :param env: """ self.env = env self._ob_space = env.observation_space self._action_space = env.action_space if env_info_sizes is None: if hasattr(env, 'info_sizes'): env_info_sizes = env.info_sizes else: env_info_sizes = dict() super().__init__( max_replay_buffer_size=max_replay_buffer_size, observation_dim=get_dim(self._ob_space), action_dim=get_dim(self._action_space), env_info_sizes=env_info_sizes, dtype=dtype )
def __init__( self, max_replay_buffer_size, env, alpha=0.6, beta=0.4, env_info_sizes=None, **kwargs ): """ :param max_replay_buffer_size: :param env: """ self.env = env self._ob_space = env.observation_space self._action_space = env.action_space if env_info_sizes is None: if hasattr(env, 'info_sizes'): env_info_sizes = env.info_sizes else: env_info_sizes = dict() super().__init__( max_replay_buffer_size=max_replay_buffer_size, observation_dim=get_dim(self._ob_space), action_dim=get_dim(self._action_space), env_info_sizes=env_info_sizes, **kwargs ) self.alpha = alpha self.beta = beta self._priorities = np.zeros((max_replay_buffer_size,))
def __init__( self, max_replay_buffer_size, env, goal_period, # time btwn goals (length of low-level traj) env_info_sizes=None, dtype='float32'): """ :param max_replay_buffer_size: :param env: """ self.env = env self._ob_space = env.observation_space self._action_space = env.goal_space if env_info_sizes is None: if hasattr(env, 'info_sizes'): env_info_sizes = env.info_sizes else: env_info_sizes = dict() super().__init__(max_replay_buffer_size=max_replay_buffer_size, observation_dim=get_dim(self._ob_space), action_dim=get_dim(self._action_space), env_info_sizes=env_info_sizes, dtype=dtype) # assumes low level gets same obs as high level self._traj_obs = np.zeros( (max_replay_buffer_size, goal_period, get_dim(self._ob_space))) if isinstance(env.action_space, Discrete): self._traj_acs = np.zeros((max_replay_buffer_size, goal_period)) else: self._traj_acs = np.zeros((max_replay_buffer_size, goal_period, get_dim(env.action_space)))
def __init__( self, max_replay_buffer_size, env, num_ensemble, log_dir, env_info_sizes=None ): """ :param max_replay_buffer_size: :param env: """ self.env = env self._ob_space = env.observation_space self._action_space = env.action_space if env_info_sizes is None: if hasattr(env, 'info_sizes'): env_info_sizes = env.info_sizes else: env_info_sizes = dict() super().__init__( max_replay_buffer_size=max_replay_buffer_size, observation_dim=get_dim(self._ob_space), action_dim=get_dim(self._action_space), env_info_sizes=env_info_sizes, num_ensemble=num_ensemble, log_dir=log_dir, )
def __init__( self, max_replay_buffer_size, env, single_flag, equal_flag, lower, upper, env_info_sizes=None ): """ :param max_replay_buffer_size: :param env: """ self.env = env self._ob_space = env.observation_space self._action_space = env.action_space if env_info_sizes is None: if hasattr(env, 'info_sizes'): env_info_sizes = env.info_sizes else: env_info_sizes = dict() super().__init__( max_replay_buffer_size=max_replay_buffer_size, observation_dim=get_dim(self._ob_space), action_dim=get_dim(self._action_space), env_info_sizes=env_info_sizes, single_flag=single_flag, equal_flag=equal_flag, lower=lower, upper=upper, )
def create_buffer(self, size=None): if size is None: size = self.mini_buffer_max_size return RLKitSimpleReplayBuffer( max_replay_buffer_size=size, observation_dim=get_dim(self._ob_space), action_dim=get_dim(self._action_space), env_info_sizes=self.env_info_sizes, )
def __init__(self, max_replay_buffer_size, env, env_info_sizes=None): observation_dim = get_dim(env.observation_space) action_dim = get_dim(env.action_space) if env_info_sizes is None: if hasattr(env, 'info_sizes'): env_info_sizes = env.info_sizes else: env_info_sizes = dict() self._max_replay_buffer_size = max_replay_buffer_size self._observations = torch.zeros( (max_replay_buffer_size, observation_dim), dtype=torch.float).pin_memory() # It's a bit memory inefficient to save the observations twice, # but it makes the code *much* easier since you no longer have to # worry about termination conditions. self._next_obs = torch.zeros((max_replay_buffer_size, observation_dim), dtype=torch.float).pin_memory() self._actions = torch.zeros((max_replay_buffer_size, action_dim), dtype=torch.float).pin_memory() # Make everything a 2D np array to make it easier for other code to # reason about the shape of the data self._rewards = torch.zeros((max_replay_buffer_size, 1), dtype=torch.float).pin_memory() # self._terminals[i] = a terminal was received at time i self._terminals = torch.zeros((max_replay_buffer_size, 1), dtype=torch.float).pin_memory() # Define self._env_infos[key][i] to be the return value of env_info[key] # at time i self._env_infos = {} for key, size in env_info_sizes.items(): self._env_infos[key] = torch.zeros((max_replay_buffer_size, size), dtype=torch.float).pin_memory() self._env_info_keys = env_info_sizes.keys() self._top = 0 self._size = 0 if ptu.gpu_enabled(): # self.stream = torch.cuda.Stream(ptu.device) self.batch = None
def __init__(self, max_replay_buffer_size, env, env_info_sizes=None, dtype='float32'): super().__init__(max_replay_buffer_size, env, env_info_sizes, dtype) self._goal_space = env.goal_space self._goals = np.zeros( (self._max_replay_buffer_size, get_dim(self._goal_space)), dtype=self.dtype)
def __init__(self, max_replay_buffer_size, env, env_info_sizes=None): """ :param max_replay_buffer_size: :param env: """ self.env = env self._node_dim = env.dim_act self._ob_space = env.observation_space self._action_space = env.action_space if env_info_sizes is None: if hasattr(env, 'info_sizes'): env_info_sizes = env.info_sizes else: env_info_sizes = dict() super().__init__(max_replay_buffer_size=max_replay_buffer_size, node_dim=self._node_dim, node_feature_dim=get_dim(self._ob_space), action_dim=get_dim(self._action_space), env_info_sizes=env_info_sizes)
def __init__( self, max_replay_buffer_size, env, attr_dict=None, env_info_sizes=None, ): """ :param max_replay_buffer_size: :param env: """ # self._attr_dict = attr_dict # if self._attr_dict: # self.env = self._attr_dict['env'] # self._ob_space = self._attr_dict['ob_space'] # self._action_space = self.attr_dict['action_space'] if attr_dict: self.env = attr_dict['env'] self._ob_space = attr_dict['ob_space'] self._action_space = attr_dict['action_space'] else: self.env = env self._ob_space = env.observation_space self._action_space = env.action_space if env_info_sizes is None: if hasattr(env, 'info_sizes'): env_info_sizes = env.info_sizes else: env_info_sizes = dict() super().__init__(max_replay_buffer_size=max_replay_buffer_size, observation_dim=get_dim(self._ob_space), action_dim=get_dim(self._action_space), env_info_sizes=env_info_sizes, attr_dict=attr_dict)