コード例 #1
0
ファイル: dqn-her.py プロジェクト: ymd-h/cpprb
class BitFlippingEnv(gym.Env):
    """
    bit-flipping environment: https://arxiv.org/abs/1707.01495

    * Environment has n-bit state.
    * Initial state and goal state are randomly selected.
    * Action is one of the 0, ..., n-1, which flips single bit
    * Reward is 0 if state == goal, otherwise reward is -1. (Sparse Binary Reward)

    Simple RL algorithms tend to fail for large ``n`` like ``n > 40``
    """
    def __init__(self, n):
        seeds = np.random.SeedSequence().spawn(3)
        self.np_random = np.random.default_rng(seeds[0])
        self.observation_space = Box(low=0, high=1, shape=(n,), dtype=int)
        self.action_space = Discrete(self.n)
        self.observation_space.seed(seeds[1].entropy)
        self.action_space.seed(seeds[2].entropy)

    def step(self, action):
        action = int(action)
        self.bit[action] = 1 - self.bit[action]
        done = (self.bit == self.goal).all()
        rew = 0 if done else -1
        return self.bit.copy(), rew, done, {}

    def reset(self):
        self.bit = self.np_random.integers(low=0, high=1, size=self.action_space.n,
                                           endpoint=True, dtype=int)
        self.goal = self.np_random.integers(low=0, high=1, size=self.action_space.n,
                                            endpoint=True, dtype=int)
        return self.bit.copy()
コード例 #2
0
def test_seed_Dict():
    test_space = Dict(
        {
            "a": Box(low=0, high=1, shape=(3, 3)),
            "b": Dict(
                {
                    "b_1": Box(low=-100, high=100, shape=(2,)),
                    "b_2": Box(low=-1, high=1, shape=(2,)),
                }
            ),
            "c": Discrete(5),
        }
    )

    seed_dict = {
        "a": 0,
        "b": {
            "b_1": 1,
            "b_2": 2,
        },
        "c": 3,
    }

    test_space.seed(seed_dict)

    # "Unpack" the dict sub-spaces into individual spaces
    a = Box(low=0, high=1, shape=(3, 3))
    a.seed(0)
    b_1 = Box(low=-100, high=100, shape=(2,))
    b_1.seed(1)
    b_2 = Box(low=-1, high=1, shape=(2,))
    b_2.seed(2)
    c = Discrete(5)
    c.seed(3)

    for i in range(10):
        test_s = test_space.sample()
        a_s = a.sample()
        assert (test_s["a"] == a_s).all()
        b_1_s = b_1.sample()
        assert (test_s["b"]["b_1"] == b_1_s).all()
        b_2_s = b_2.sample()
        assert (test_s["b"]["b_2"] == b_2_s).all()
        c_s = c.sample()
        assert test_s["c"] == c_s
コード例 #3
0
ファイル: mouse_env.py プロジェクト: jaentrouble/mouse_test_2
class MouseEnv(gym.Env) :
    metadata = {
        'render.modes' : ['human','rgb']
    }
    def __init__(self):
        #Turn left 45°, Move forward, Turn right 45°
        self.action_space = Discrete(3)
        self._done = False
        self.viewer = None
        self.engine = None
        self.max_step = 100
        self.cur_step = 0
        self.image_size = (720,720)
        self.seed()

        # 3 Continuous Inputs from both eyes
        self.observation_space = Dict(
            {'Right' : Box(0, 255, shape=(100,ec.CacheNum,3), dtype=np.uint8),
             'Left' : Box(0,255, shape=(100,ec.CacheNum,3), dtype = np.uint8)}
        )
        

    def step(self, action):
        assert not (self.engine is None), 'Reset first before starting env'
        if self._done :
            print('The game is already done. Continuing may cause unexpected'\
                ' behaviors')
        if action == 0:
            trans_action = ((0,0),np.pi/4)
        elif action == 1:
            trans_action = ((10,0),0)
        elif action == 2:
            trans_action = ((0,0),-np.pi/4)
        observation, reward, done, info = self.engine.update(trans_action)
        if done:
            self._done = True
        
        #Check if reached max_step
        self.cur_step += 1
        if self.cur_step >= self.max_step:
            self._done = True
            done = True

        return observation, reward, done, info

    def reset(self):
        """
        Reset the environment and return initial observation
        """
        self._done = False
        self.cur_step = 0
        self.engine = self._new_engine()
        initial_observation = self.engine.initial_observation()
        return initial_observation

    def render(self, mode='human'):
        assert not (self.engine is None), 'Reset first before starting env'
        if 'human' in mode :
            from gym.envs.classic_control import rendering
            if self.viewer == None:
                self.viewer = rendering.SimpleImageViewer(maxwidth=720)
            self.viewer.imshow(self.engine.image)
        elif 'rgb' in mode :
            return self.engine.image

    def seed(self, seed=None):
        np_random, seed = seeding.np_random(seed)
        rng.np_random = np_random
        self.action_space.seed(seed)

    def close(self):
        if self.viewer:
            self.viewer.close()
            self.viewer = None

    def _new_engine(self):
        return Engine(*self.image_size)
コード例 #4
0
class Expando(Env):
    """Gym environment wrapping the expando game. For details on the game, check the ExpandoGame class.

    Action-space:
        Multidiscrete: (move_direction, action_type) with move_direction in {0, ..., 2 * n_axis}, where 0 - n_axis
        represent movement along an axis in the positive direction and n_axis - 2 * n_axis in negative direction. And
        action_type is in {piece_type_0, ..., piece_type_n}, ie.e. there is a placement action for each type of piece.

        If `multi_discrete_actions` is set to False, the discrete action space over all items in the cartesian product
        of move_direction and action_type will be used to get a single discrete action space over
        {0,..., n_axis * piece_type_n}. The order of action pairs then is the same as returned by `itertools.product()`.

    Observation-space description:
        A Box space where each observation has dimensions (axis_0 x axis_1 ... x axis_n x n_one_hot x n_scores)
        where axis_k is the length of the k-th axis of the game board grid,
        n_one_hot = 1 + n_players * (n_piece_types - 1) the dimension of the piece's one-hot encodings and n_scores = 3
        is the number of additional normalized features regarding the player: is_cursor_position, room, population.
        Note that n_one_hot accounts for the empty piece_type which doesn't belong to a player.

        If `flat_observations` is set to True, the box observations are going to be
        (axis_0 * axis_1 ... * axis_n * n_one_hot + n_scores) dimensional vectors, where n_scores = 3 + n_axis, since
        the cursor's position is on longer represented as bit, but as normalized (x, y, ...) coordinates.
    """
    def __init__(self,
                 grid_size: tuple,
                 n_players: int = 2,
                 max_turns=100,
                 final_reward=100,
                 piece_types=None,
                 policies_other=None,
                 observe_all=False,
                 multi_discrete_actions=False,
                 flat_observations=False,
                 render=False,
                 cell_size=50,
                 padding=5,
                 ui_font_size=14,
                 seed=None):
        """

        :param grid_size: tuple specifying the dimensions of the game's board.
        :param n_players: number of players participating in the game.
        :param max_turns: maximum number of turns per episode.
        :param final_reward: amount of final reward given to the winner and taken from the losers.
        :param piece_types: list of dict configs containing describing possible pieces.
        :param policies_other: list of policies to use for opponents players.
        :param observe_all: whether to return observations on `step()` for all players in the info dict or not.
        :param multi_discrete_actions: whether to use a multi-discrete action space.
        :param flat_observations: whether to flatten the observations or return as tensor.
        :param render: enables rendering when calling `render()`.
        :param cell_size: width/height of a cell when rendering.
        :param padding: padding between cells when rendering.
        :param ui_font_size: size of the ui font when rendering.
        :param seed: random seed.
        """
        grid_size = tuple(grid_size)
        if policies_other is not None:
            assert n_players - 1 == len(
                policies_other), 'please provide a policy for each opponent.'

        self.n_players = n_players
        self.policies_other = policies_other
        self.observe_all = observe_all

        if piece_types is None:
            self.piece_types = self._get_default_piece_types()
        else:
            self.piece_types = piece_types
        n_piece_types = len(self.piece_types)

        # actions: (cursor move direction, piece_type)
        # where (cursor move direction) encodes +1 or -1 movement along an axis and 0 for no movement.
        n_move_directions = 1 + 2 * len(grid_size)
        if multi_discrete_actions:
            self.action_space = MultiDiscrete(
                [n_move_directions, n_piece_types])
        else:
            self.action_space = Discrete(n_move_directions * n_piece_types)

        # observation space:
        # (d_0 * ... * d_n * piece_type * player
        # + cursor_d_0 + ... + cursor_d_n + population + room)
        k_cursor_features = len(grid_size) if flat_observations else 1
        obs_dims = grid_size + (1 + (n_piece_types - 1) * n_players, )
        self.observation_space = OneHotBox(OneHot(obs_dims),
                                           Box(0.0,
                                               1.0,
                                               shape=(2 +
                                                      k_cursor_features, )),
                                           flatten=flat_observations)

        self.game = ExpandoGame(grid_size,
                                n_players,
                                max_turns,
                                final_reward=final_reward,
                                piece_types=self.piece_types,
                                seed=seed)
        self.observation_format = 'flat' if flat_observations else 'grid'
        self.do_render = render
        if self.do_render:
            self.renderer = GameRenderer(self.game, cell_size, padding,
                                         ui_font_size)

        self.seed(seed)

    def step(self, action, other_actions=None):
        """Perform each player's turn.

        :param action: action to take as player 0
        :param other_actions: optional list of actions to take for the other players. Will be sampled from actions_space
        if not provided.
        :return: obs_0, reward_0, done, info
        """
        if self.policies_other is not None:
            assert other_actions is None, 'other actions are already defined by the policies passed at initialization'

        # other player actions passed as argument
        if other_actions is not None:
            assert len(
                other_actions
            ) + 1 == self.n_players, 'please provide an action for each player'
            rewards_other = [
                self.game.take_turn(action, i)
                for i, action in enumerate(other_actions, start=1)
            ]
        # other player actions defined by policies passed to constructor
        elif self.policies_other is not None:
            other_obs = [
                self.game.get_observation(i, self.observation_format)
                for i in range(1, self.n_players)
            ]
            actions_other = [
                policy.predict(obs)[0][0]
                for obs, policy in zip(other_obs, self.policies_other)
            ]
            rewards_other = [
                self.game.take_turn(a, i)
                for i, a in enumerate(actions_other, start=1)
            ]
        # no other player actions provided: sample
        else:
            rewards_other = [
                self.game.take_turn(self.action_space.sample(), i)
                for i in range(1, self.n_players)
            ]

        info = {}
        if self.observe_all:
            other_obs_new = [
                self.game.get_observation(i, self.observation_format)
                for i in range(1, self.n_players)
            ]
            info = {'rewards_other': rewards_other, 'obs_other': other_obs_new}

        reward_0 = self.game.take_turn(action, player_id=0)
        obs_0 = self.game.get_observation(player_id=0,
                                          formatting=self.observation_format)
        done = self.game.is_done

        if done:
            self.game.reset()

        return obs_0, reward_0, done, info

    def seed(self, seed=None):
        """Set seeds of all random number generators. Note that pseudo random actions are performed at initialization,
        so in order to seed these actions as well you need to pass a seed to the constructor.

        :param seed: seed to set
        """
        self.observation_space.seed(seed)
        self.action_space.seed(seed)
        self.game.seed(seed)

    def reset(self, player_id=0):
        """Reset the environment.

        :param player_id: id of the player to get the first observation from.
        :return: observation of player with player_id or a list of all observations if `observe_all` was set.
        """
        self.game.reset()
        if self.observe_all:
            return [
                self.game.get_observation(i, self.observation_format)
                for i in range(self.n_players)
            ]
        return self.game.get_observation(player_id, self.observation_format)

    def render(self, mode='human'):
        """Render a pyglet visualization. Only works with 2D grids.
        """
        assert len(
            self.game.grid_size
        ) < 3, 'Only 2D grids are supported for rendering at the moment.'
        if self.do_render:
            self.renderer.step()

    @staticmethod
    def from_config(file_path):
        """Load environment using a yaml configuration file or a composable hydra config

        :param file_path: path to the config file
        :return: A configured Expando environment
        """
        file_path = to_absolute_path(file_path)
        conf_dir, file_name = os.path.split(file_path)
        with initialize_config_dir(conf_dir):
            cfg = compose(config_name=file_name)

        env = Expando(**cfg)
        return env

    @staticmethod
    def _get_default_piece_types():
        """Load the default piece types from default_config/

        :return: DictConfig containing piece_types
        """
        this_file_dir = os.path.split(relpath(__file__))[0]
        path = os.path.join(this_file_dir, 'default_config/piece_types.yaml')
        return OmegaConf.load(path).piece_types
コード例 #5
0
    def test_spaces(self):
        experiment_name = "test_spaces"
        module_name = "module"

        logger = ModuleLogger(
            output_path=Path(self.temp_dir.name),
            experiment_name=experiment_name,
            module=module_name,
            step_write_frequency=None,
            episode_write_frequency=None,
        )
        seed = 3

        # Discrete
        space = Discrete(n=3)
        space.seed(seed)
        logger.log_space("Discrete", space.sample())

        # MultiDiscrete
        space = MultiDiscrete(np.array([3, 2]))
        space.seed(seed)
        logger.log_space("MultiDiscrete", space.sample())

        # Dict
        space = Dict({
            "predictiveChangeVarDiscountedAverage":
            spaces.Box(low=-np.inf, high=np.inf, shape=(1, )),
            "predictiveChangeVarUncertainty":
            spaces.Box(low=0, high=np.inf, shape=(1, )),
            "lossVarDiscountedAverage":
            spaces.Box(low=-np.inf, high=np.inf, shape=(1, )),
            "lossVarUncertainty":
            spaces.Box(low=0, high=np.inf, shape=(1, )),
            "currentLR":
            spaces.Box(low=0, high=1, shape=(1, )),
            "trainingLoss":
            spaces.Box(low=0, high=np.inf, shape=(1, )),
            "validationLoss":
            spaces.Box(low=0, high=np.inf, shape=(1, )),
        })
        space.seed(seed)
        logger.log_space("Dict", space.sample())

        space = Box(np.array([0, 0]), np.array([2, 2]))
        space.seed(seed)
        logger.log_space("Box", space.sample())
        logger.close()

        with open(logger.get_logfile(), "r") as log_file:
            logs = list(map(json.loads, log_file))

        wide = log2dataframe(logs, wide=True)
        long = log2dataframe(logs, drop_columns=None)

        self.assertEqual(len(wide), 1)
        first_row = wide.iloc[0]

        # Discrete
        self.assertTrue(not np.isnan(first_row.Discrete))

        # MultiDiscrete
        self.assertTrue(not np.isnan(first_row.MultiDiscrete_0))
        self.assertTrue(not np.isnan(first_row.MultiDiscrete_1))
        simultaneous_logged = long[(long.name == "MultiDiscrete_0") |
                                   (long.name == "MultiDiscrete_1")]
        self.assertEqual(len(simultaneous_logged.time.unique()), 1)

        # Dict
        expected_columns = [
            "Dict_currentLR_0",
            "Dict_lossVarDiscountedAverage_0",
            "Dict_lossVarUncertainty_0",
            "Dict_predictiveChangeVarDiscountedAverage_0",
            "Dict_predictiveChangeVarUncertainty_0",
            "Dict_trainingLoss_0",
        ]

        for expected_column in expected_columns:
            self.assertTrue(not np.isnan(first_row[expected_column]))

        simultaneous_logged = long[long.name.isin(expected_columns)]
        self.assertEqual(len(simultaneous_logged.time.unique()), 1)

        # Box
        self.assertTrue(not np.isnan(first_row.Box_0))
        self.assertTrue(not np.isnan(first_row.Box_1))

        simultaneous_logged = long[(long.name == "Box_0") |
                                   (long.name == "Box_1")]
        self.assertEqual(len(simultaneous_logged.time.unique()), 1)
コード例 #6
0
class MouseEnv_cl(gym.Env):
    """MouseEnv_cl-v2

    Now it does not calculate mouse - apple distance.
    Instead, it can have multiple apples    
    """
    metadata = {'render.modes': ['human', 'rgb']}

    def __init__(self, **kwargs):
        """
        kwargs
        ------
        apple_num : int
            number of apples in a map. Default is 1
        eat_apple : float
            reward given when apple is eaten. Default is 1.0
        hit_wall : float
            punishment(or reward?) given when hit wall. Default is 0
        """
        #Turn left 45°, Move forward, Turn right 45°
        self.action_space = Discrete(3)
        self._done = False
        self.viewer = None
        self.engine = None

        kwargs.setdefault('apple_num', 1)
        kwargs.setdefault('eat_apple', 1.0)
        kwargs.setdefault('hit_wall', 0)
        self._options = kwargs

        self.max_step = 1000
        self.cur_step = 0
        self.image_size = (720, 720)
        self.seed()

        # 3 Continuous Inputs from both eyes
        self.observation_space = Dict({
            'Right':
            Box(0, 255, shape=(100, ec.CacheNum, 3), dtype=np.uint8),
            'Left':
            Box(0, 255, shape=(100, ec.CacheNum, 3), dtype=np.uint8)
        })

    def step(self, action):
        assert not (self.engine is None), 'Reset first before starting env'
        if self._done:
            print('The game is already done. Continuing may cause unexpected'\
                ' behaviors')
        if action == 0:
            trans_action = ((0, 0), np.pi / 4)
        elif action == 1:
            trans_action = ((10, 0), 0)
        elif action == 2:
            trans_action = ((0, 0), -np.pi / 4)
        observation, reward, done, info = self.engine.update(trans_action)
        if done:
            self._done = True

        #Check if reached max_step
        self.cur_step += 1
        if self.cur_step >= self.max_step:
            self._done = True
            done = True

        return observation, reward, done, info

    def reset(self):
        """
        Reset the environment and return initial observation
        """
        self._done = False
        self.cur_step = 0
        self.engine = self._new_engine()
        initial_observation = self.engine.initial_observation()
        return initial_observation

    def render(self, mode='human'):
        assert not (self.engine is None), 'Reset first before starting env'
        if 'human' in mode:
            from gym.envs.classic_control import rendering
            if self.viewer == None:
                self.viewer = rendering.SimpleImageViewer(maxwidth=720)
            self.viewer.imshow(self.engine.image)
        elif 'rgb' in mode:
            return self.engine.image

    def seed(self, seed=None):
        np_random, seed = seeding.np_random(seed)
        rng.np_random = np_random
        self.action_space.seed(seed)

    def close(self):
        if self.viewer:
            self.viewer.close()
            self.viewer = None

    def _new_engine(self):
        return Engine(self.image_size, **self._options)