예제 #1
0
    def __init__(self, dataset, batch_size=1, shuffle=False, sampler=None, batch_sampler=None,
                 num_workers=0, collate_fn=default_collate, pin_memory=False, drop_last=False, seed=None,
                 worker_init_fn=None, worker_init_args=None, worker_init_kwargs=None):
        super().__init__(dataset, batch_size=batch_size, shuffle=shuffle, sampler=sampler, batch_sampler=batch_sampler,
                         num_workers=num_workers, collate_fn=collate_fn, pin_memory=pin_memory, drop_last=drop_last)
        self.worker_init_fn = worker_init_fn
        self.worker_init_args = worker_init_args
        self.worker_init_kwargs = worker_init_kwargs

        if num_workers > 0:
            self.seed_generator = gen_rng(seed)
            self.worker_init_args = worker_init_args if worker_init_args is not None else [tuple() for _ in range(num_workers)]
            self.worker_init_kwargs = worker_init_kwargs if worker_init_kwargs is not None else [{} for _ in range(num_workers)]
        else:
            self.worker_init_args = worker_init_args if worker_init_args is not None else tuple()
            self.worker_init_kwargs = worker_init_kwargs if worker_init_kwargs is not None else {}
예제 #2
0
 def __init__(self, action_meanings=None):
     self.__rng = random.gen_rng()
     self._action_meanings = action_meanings
예제 #3
0
    def __init__(self, map_size=14, visible_size=None, obs_ratio=0.3, enable_path_checking=True,
                 random_action_mapping=None,
                 enable_noaction=False, dense_reward=False,
                 reward_move=None, reward_noaction=0, reward_final=10, reward_error=-2, state_mode='DEFAULT'):
        """
        :param map_size: A single int or a tuple (h, w), representing the map size.
        :param visible_size: A single int or a tuple (h, w), representing the visible size. The agent will at the center
            of the visible window, and out-of-border part will be colored by obstacle color.

        :param obs_ratio: Obstacle ratio (how many obstacles will be in the map).
        :param enable_path_checking: Enable path computation in map construction. Turn it down only when you are sure about
            valid maze.

        :param random_action_mapping: Whether to enable random action mapping. If true, the result of performing
            every action will be shuffled. _checkingIf a single bool True is provided, we do random shuffle. Otherwise,
            it should be a list with same length as action space (5 when noaction enabled, 4 otherwise).

        :param enable_noaction: Whether to enable no-action operation.
        :param dense_reward: Whether the reward is dense.
        :param reward_move: Reward for a valid move. For dense reward setting, it should be a positive number.
            While in sparse reward setting, it is expected to be a non-positive number.

        :param reward_noaction: Reward for a no-action.
        :param reward_final: Reward when you arrive at the final point.
        :param reward_error: Reward when you perform an invalid move.
        :param state_mode: State mode, either 'DEFAULT' or 'RENDER'.
        """

        super().__init__()
        self._rng = random.gen_rng()
        self._map_size = get_2dshape(map_size)
        self._visible_size = visible_size
        self._enable_path_checking = enable_path_checking
        if self._visible_size is not None:
            self._visible_size = get_2dshape(self._visible_size)

        self._obs_ratio = obs_ratio

        if enable_noaction:
            self._action_space = DiscreteActionSpace(5, action_meanings=['NOOP', 'UP', 'RIGHT', 'DOWN', 'LEFT'])
            self._action_delta = [(0, 0), (-1, 0), (0, 1), (1, 0), (0, -1)]
            self._action_mapping = [0, 1, 2, 3, 4]
        else:
            self._action_space = DiscreteActionSpace(4, action_meanings=['UP', 'RIGHT', 'DOWN', 'LEFT'])
            self._action_delta = [(-1, 0), (0, 1), (1, 0), (0, -1)]
            self._action_mapping = [0, 1, 2, 3]

        if random_action_mapping is not None:
            if random_action_mapping is True:
                self._rng.shuffle(self._action_mapping)
            else:
                assert len(self._action_mapping) == len(random_action_mapping)
                self._action_mapping = random_action_mapping

        self._enable_noaction = enable_noaction
        self._dense_reward = dense_reward
        if reward_move is None:
            reward_move = -1 if not dense_reward else 1
        self._rewards = (reward_move, reward_noaction, reward_final, reward_error)
        assert state_mode in ('DEFAULT' ,'RENDER')
        self._state_mode = state_mode
예제 #4
0
 def _initialize(self):
     self._rng = gen_rng(seed=self._seed)