def __init__(self, dataset, batch_size=1, shuffle=False, sampler=None, batch_sampler=None, num_workers=0, collate_fn=default_collate, pin_memory=False, drop_last=False, seed=None, worker_init_fn=None, worker_init_args=None, worker_init_kwargs=None): super().__init__(dataset, batch_size=batch_size, shuffle=shuffle, sampler=sampler, batch_sampler=batch_sampler, num_workers=num_workers, collate_fn=collate_fn, pin_memory=pin_memory, drop_last=drop_last) self.worker_init_fn = worker_init_fn self.worker_init_args = worker_init_args self.worker_init_kwargs = worker_init_kwargs if num_workers > 0: self.seed_generator = gen_rng(seed) self.worker_init_args = worker_init_args if worker_init_args is not None else [tuple() for _ in range(num_workers)] self.worker_init_kwargs = worker_init_kwargs if worker_init_kwargs is not None else [{} for _ in range(num_workers)] else: self.worker_init_args = worker_init_args if worker_init_args is not None else tuple() self.worker_init_kwargs = worker_init_kwargs if worker_init_kwargs is not None else {}
def __init__(self, action_meanings=None): self.__rng = random.gen_rng() self._action_meanings = action_meanings
def __init__(self, map_size=14, visible_size=None, obs_ratio=0.3, enable_path_checking=True, random_action_mapping=None, enable_noaction=False, dense_reward=False, reward_move=None, reward_noaction=0, reward_final=10, reward_error=-2, state_mode='DEFAULT'): """ :param map_size: A single int or a tuple (h, w), representing the map size. :param visible_size: A single int or a tuple (h, w), representing the visible size. The agent will at the center of the visible window, and out-of-border part will be colored by obstacle color. :param obs_ratio: Obstacle ratio (how many obstacles will be in the map). :param enable_path_checking: Enable path computation in map construction. Turn it down only when you are sure about valid maze. :param random_action_mapping: Whether to enable random action mapping. If true, the result of performing every action will be shuffled. _checkingIf a single bool True is provided, we do random shuffle. Otherwise, it should be a list with same length as action space (5 when noaction enabled, 4 otherwise). :param enable_noaction: Whether to enable no-action operation. :param dense_reward: Whether the reward is dense. :param reward_move: Reward for a valid move. For dense reward setting, it should be a positive number. While in sparse reward setting, it is expected to be a non-positive number. :param reward_noaction: Reward for a no-action. :param reward_final: Reward when you arrive at the final point. :param reward_error: Reward when you perform an invalid move. :param state_mode: State mode, either 'DEFAULT' or 'RENDER'. """ super().__init__() self._rng = random.gen_rng() self._map_size = get_2dshape(map_size) self._visible_size = visible_size self._enable_path_checking = enable_path_checking if self._visible_size is not None: self._visible_size = get_2dshape(self._visible_size) self._obs_ratio = obs_ratio if enable_noaction: self._action_space = DiscreteActionSpace(5, action_meanings=['NOOP', 'UP', 'RIGHT', 'DOWN', 'LEFT']) self._action_delta = [(0, 0), (-1, 0), (0, 1), (1, 0), (0, -1)] self._action_mapping = [0, 1, 2, 3, 4] else: self._action_space = DiscreteActionSpace(4, action_meanings=['UP', 'RIGHT', 'DOWN', 'LEFT']) self._action_delta = [(-1, 0), (0, 1), (1, 0), (0, -1)] self._action_mapping = [0, 1, 2, 3] if random_action_mapping is not None: if random_action_mapping is True: self._rng.shuffle(self._action_mapping) else: assert len(self._action_mapping) == len(random_action_mapping) self._action_mapping = random_action_mapping self._enable_noaction = enable_noaction self._dense_reward = dense_reward if reward_move is None: reward_move = -1 if not dense_reward else 1 self._rewards = (reward_move, reward_noaction, reward_final, reward_error) assert state_mode in ('DEFAULT' ,'RENDER') self._state_mode = state_mode
def _initialize(self): self._rng = gen_rng(seed=self._seed)