def action_space(self): space = self._env.action_space if not self._should_normalize_action: return space if isinstance(space, tools.ActTuple): return tools.ActTuple(( spaces.Discrete(3), # offensive decision # ball theta spaces.Box(-np.ones(space[1].shape), np.ones(space[1].shape), dtype=np.float32), # offense player DASH(power, direction) spaces.Box(-np.ones(space[2].shape), np.ones(space[2].shape), dtype=np.float32), # defense player DASH(power, direction) spaces.Box(-np.ones(space[3].shape), np.ones(space[3].shape), dtype=np.float32))) elif isinstance(space, tools.NDefActTuple): return tools.NDefActTuple(( spaces.Discrete(3), # offensive decision # ball theta spaces.Box(-np.ones(space[1].shape), np.ones(space[1].shape), dtype=np.float32), # offense player DASH(power, direction) spaces.Box(-np.ones(space[2].shape), np.ones(space[2].shape), dtype=np.float32)))
def _set_action_space(self): """ Return ------ ActTuple(Discrete(3), Box(2), Box(5, 2), Box(5, 2)) """ return tools.ActTuple(( spaces.Discrete(3), # offensive decision # ball dir spaces.Box(low=-1, high=1, shape=(2, ), dtype=np.float32), # offense player DASH(x, y) spaces.Box(low=-self.pl_max_power, high=self.pl_max_power, shape=(5, 2), dtype=np.float32), # defense player DASH(x, y) spaces.Box(low=-self.pl_max_power, high=self.pl_max_power, shape=(5, 2), dtype=np.float32)))
def _set_action_space(self): """ Return ------ ActTuple(Discrete(3), Box(), Box(5, 2), Box(5, 2)) """ return tools.ActTuple(( spaces.Discrete(3), # offensive decision # ball theta spaces.Box(low=-np.pi, high=np.pi, shape=(), dtype=np.float32), # offense player DASH(power, direction) spaces.Box(low=np.array([[0, -np.pi] for _ in range(5)], dtype=np.float32), high=np.array([[self.pl_max_power, np.pi] for _ in range(5)], dtype=np.float32), dtype=np.float32), # defense player DASH(power, direction) spaces.Box(low=np.array([[0, -np.pi] for _ in range(5)], dtype=np.float32), high=np.array([[self.pl_max_power, np.pi] for _ in range(5)], dtype=np.float32), dtype=np.float32)))