コード例 #1
0
    def __init__(self,
                 max_replay_buffer_size,
                 env,
                 env_info_sizes=None,
                 probability_function='Linear',
                 steps_to_end=10,
                 threshold=0.7):
        """
        :param max_replay_buffer_size:
        :param env:
        :param env_info_sizes:
        :param probability_function:
        :param steps_to_end:
        :param threshold:
        """
        self.env = env
        self._ob_space = env.observation_space
        self._action_space = env.action_space
        self._probability_function = probability_function
        self._steps_to_end = steps_to_end
        self._threshold = threshold

        if env_info_sizes is None:
            if hasattr(env, 'info_sizes'):
                env_info_sizes = env.info_sizes
            else:
                env_info_sizes = dict()

        super().__init__(max_replay_buffer_size=max_replay_buffer_size,
                         observation_dim=get_dim(self._ob_space),
                         action_dim=get_dim(self._action_space),
                         env_info_sizes=env_info_sizes)
        self._terminals = np.zeros((max_replay_buffer_size, 1))
コード例 #2
0
    def __init__(
        self,
        max_replay_buffer_size,
        env,
        env_info_sizes=None,
        buffer_path='',
        preload_buffer=False,
        priorite=False,
    ):
        """
        :param max_replay_buffer_size:
        :param env:
        """
        self.env = env
        self._ob_space = env.observation_space
        self._action_space = env.action_space

        if env_info_sizes is None:
            if hasattr(env, 'info_sizes'):
                env_info_sizes = env.info_sizes
            else:
                env_info_sizes = dict()

        super().__init__(
            max_replay_buffer_size=max_replay_buffer_size,
            observation_dim=get_dim(self._ob_space),
            action_dim=get_dim(self._action_space),
            env_info_sizes=env_info_sizes,
            preload_buffer=preload_buffer,
            path=buffer_path,
            priority=priority,
        )
コード例 #3
0
    def __init__(
            self,
            max_replay_buffer_size,
            env,
            env_info_sizes=None,
            dtype='float32'
    ):
        """
        :param max_replay_buffer_size:
        :param env:
        """
        self.env = env
        self._ob_space = env.observation_space
        self._action_space = env.action_space

        if env_info_sizes is None:
            if hasattr(env, 'info_sizes'):
                env_info_sizes = env.info_sizes
            else:
                env_info_sizes = dict()

        super().__init__(
            max_replay_buffer_size=max_replay_buffer_size,
            observation_dim=get_dim(self._ob_space),
            action_dim=get_dim(self._action_space),
            env_info_sizes=env_info_sizes,
            dtype=dtype
        )
コード例 #4
0
    def __init__(
            self,
            max_replay_buffer_size,
            env,
            alpha=0.6,
            beta=0.4,
            env_info_sizes=None,
            **kwargs
    ):
        """
        :param max_replay_buffer_size:
        :param env:
        """
        self.env = env
        self._ob_space = env.observation_space
        self._action_space = env.action_space

        if env_info_sizes is None:
            if hasattr(env, 'info_sizes'):
                env_info_sizes = env.info_sizes
            else:
                env_info_sizes = dict()

        super().__init__(
            max_replay_buffer_size=max_replay_buffer_size,
            observation_dim=get_dim(self._ob_space),
            action_dim=get_dim(self._action_space),
            env_info_sizes=env_info_sizes,
            **kwargs
        )
        self.alpha = alpha
        self.beta = beta
        self._priorities = np.zeros((max_replay_buffer_size,))
コード例 #5
0
    def __init__(
            self,
            max_replay_buffer_size,
            env,
            goal_period,  # time btwn goals (length of low-level traj)
            env_info_sizes=None,
            dtype='float32'):
        """
        :param max_replay_buffer_size:
        :param env:
        """
        self.env = env
        self._ob_space = env.observation_space
        self._action_space = env.goal_space

        if env_info_sizes is None:
            if hasattr(env, 'info_sizes'):
                env_info_sizes = env.info_sizes
            else:
                env_info_sizes = dict()

        super().__init__(max_replay_buffer_size=max_replay_buffer_size,
                         observation_dim=get_dim(self._ob_space),
                         action_dim=get_dim(self._action_space),
                         env_info_sizes=env_info_sizes,
                         dtype=dtype)

        # assumes low level gets same obs as high level
        self._traj_obs = np.zeros(
            (max_replay_buffer_size, goal_period, get_dim(self._ob_space)))
        if isinstance(env.action_space, Discrete):
            self._traj_acs = np.zeros((max_replay_buffer_size, goal_period))
        else:
            self._traj_acs = np.zeros((max_replay_buffer_size, goal_period,
                                       get_dim(env.action_space)))
コード例 #6
0
    def __init__(
            self,
            max_replay_buffer_size,
            env,
            num_ensemble,
            log_dir,
            env_info_sizes=None
    ):
        """
        :param max_replay_buffer_size:
        :param env:
        """
        self.env = env
        self._ob_space = env.observation_space
        self._action_space = env.action_space

        if env_info_sizes is None:
            if hasattr(env, 'info_sizes'):
                env_info_sizes = env.info_sizes
            else:
                env_info_sizes = dict()

        super().__init__(
            max_replay_buffer_size=max_replay_buffer_size,
            observation_dim=get_dim(self._ob_space),
            action_dim=get_dim(self._action_space),
            env_info_sizes=env_info_sizes,
            num_ensemble=num_ensemble,
            log_dir=log_dir,
        )
コード例 #7
0
    def __init__(
            self,
            max_replay_buffer_size,
            env,
            single_flag,
            equal_flag,
            lower,
            upper,
            env_info_sizes=None
    ):
        """
        :param max_replay_buffer_size:
        :param env:
        """
        self.env = env
        self._ob_space = env.observation_space
        self._action_space = env.action_space

        if env_info_sizes is None:
            if hasattr(env, 'info_sizes'):
                env_info_sizes = env.info_sizes
            else:
                env_info_sizes = dict()

        super().__init__(
            max_replay_buffer_size=max_replay_buffer_size,
            observation_dim=get_dim(self._ob_space),
            action_dim=get_dim(self._action_space),
            env_info_sizes=env_info_sizes,
            single_flag=single_flag,
            equal_flag=equal_flag,
            lower=lower,
            upper=upper,
        )
コード例 #8
0
 def create_buffer(self, size=None):
     if size is None:
         size = self.mini_buffer_max_size
     return RLKitSimpleReplayBuffer(
         max_replay_buffer_size=size,
         observation_dim=get_dim(self._ob_space),
         action_dim=get_dim(self._action_space),
         env_info_sizes=self.env_info_sizes,
     )
コード例 #9
0
    def __init__(self, max_replay_buffer_size, env, env_info_sizes=None):
        observation_dim = get_dim(env.observation_space)
        action_dim = get_dim(env.action_space)

        if env_info_sizes is None:
            if hasattr(env, 'info_sizes'):
                env_info_sizes = env.info_sizes
            else:
                env_info_sizes = dict()

        self._max_replay_buffer_size = max_replay_buffer_size
        self._observations = torch.zeros(
            (max_replay_buffer_size, observation_dim),
            dtype=torch.float).pin_memory()
        # It's a bit memory inefficient to save the observations twice,
        # but it makes the code *much* easier since you no longer have to
        # worry about termination conditions.
        self._next_obs = torch.zeros((max_replay_buffer_size, observation_dim),
                                     dtype=torch.float).pin_memory()
        self._actions = torch.zeros((max_replay_buffer_size, action_dim),
                                    dtype=torch.float).pin_memory()
        # Make everything a 2D np array to make it easier for other code to
        # reason about the shape of the data
        self._rewards = torch.zeros((max_replay_buffer_size, 1),
                                    dtype=torch.float).pin_memory()
        # self._terminals[i] = a terminal was received at time i
        self._terminals = torch.zeros((max_replay_buffer_size, 1),
                                      dtype=torch.float).pin_memory()
        # Define self._env_infos[key][i] to be the return value of env_info[key]
        # at time i
        self._env_infos = {}
        for key, size in env_info_sizes.items():
            self._env_infos[key] = torch.zeros((max_replay_buffer_size, size),
                                               dtype=torch.float).pin_memory()
        self._env_info_keys = env_info_sizes.keys()

        self._top = 0
        self._size = 0

        if ptu.gpu_enabled():
            # self.stream = torch.cuda.Stream(ptu.device)
            self.batch = None
コード例 #10
0
    def __init__(self,
                 max_replay_buffer_size,
                 env,
                 env_info_sizes=None,
                 dtype='float32'):
        super().__init__(max_replay_buffer_size, env, env_info_sizes, dtype)

        self._goal_space = env.goal_space
        self._goals = np.zeros(
            (self._max_replay_buffer_size, get_dim(self._goal_space)),
            dtype=self.dtype)
コード例 #11
0
    def __init__(self, max_replay_buffer_size, env, env_info_sizes=None):
        """
        :param max_replay_buffer_size:
        :param env:
        """
        self.env = env
        self._node_dim = env.dim_act
        self._ob_space = env.observation_space
        self._action_space = env.action_space

        if env_info_sizes is None:
            if hasattr(env, 'info_sizes'):
                env_info_sizes = env.info_sizes
            else:
                env_info_sizes = dict()

        super().__init__(max_replay_buffer_size=max_replay_buffer_size,
                         node_dim=self._node_dim,
                         node_feature_dim=get_dim(self._ob_space),
                         action_dim=get_dim(self._action_space),
                         env_info_sizes=env_info_sizes)
    def __init__(
        self,
        max_replay_buffer_size,
        env,
        attr_dict=None,
        env_info_sizes=None,
    ):
        """
        :param max_replay_buffer_size:
        :param env:
        """

        # self._attr_dict = attr_dict

        # if self._attr_dict:
        #     self.env = self._attr_dict['env']
        #     self._ob_space = self._attr_dict['ob_space']
        #     self._action_space = self.attr_dict['action_space']

        if attr_dict:
            self.env = attr_dict['env']
            self._ob_space = attr_dict['ob_space']
            self._action_space = attr_dict['action_space']

        else:
            self.env = env
            self._ob_space = env.observation_space
            self._action_space = env.action_space

        if env_info_sizes is None:
            if hasattr(env, 'info_sizes'):
                env_info_sizes = env.info_sizes
            else:
                env_info_sizes = dict()

        super().__init__(max_replay_buffer_size=max_replay_buffer_size,
                         observation_dim=get_dim(self._ob_space),
                         action_dim=get_dim(self._action_space),
                         env_info_sizes=env_info_sizes,
                         attr_dict=attr_dict)