Python MultiDiscreteの例、gym.spaces.MultiDiscrete Pythonの例

コード例 #1

0

ファイルを表示

    def __init__(self, env_config):
        self.state = None
        self.agent_1 = 0
        self.agent_2 = 1
        # MADDPG emits action logits instead of actual discrete actions
        self.actions_are_logits = env_config.get("actions_are_logits", False)
        self.one_hot_state_encoding = env_config.get("one_hot_state_encoding",
                                                     False)
        self.with_state = env_config.get("separate_state_space", False)

        if not self.one_hot_state_encoding:
            self.observation_space = Discrete(6)
            self.with_state = False
        else:
            # Each agent gets the full state (one-hot encoding of which of the
            # three states are active) as input with the receiving agent's
            # ID (1 or 2) concatenated onto the end.
            if self.with_state:
                self.observation_space = Dict({
                    "obs":
                    MultiDiscrete([2, 2, 2, 3]),
                    ENV_STATE:
                    MultiDiscrete([2, 2, 2])
                })
            else:
                self.observation_space = MultiDiscrete([2, 2, 2, 3])

コード例 #2

0

ファイルを表示

ファイル: rl_cost_space_env.py プロジェクト: DoKu88/pyOptimalMotionPlanning

 def __init__(self, start, goal):
     """ Initializes the environment.
     Args:
         start (float[4])              : [x, y, v_xi, x_yi]
         goal (float[4])               : [x, y, v_xf, v_yf]
         obstacles [height (int) x width (int)]: square matrix with 0 indicating empty and 1 is obstacle
     Returns:
         Initialized object duh"""
     self.start = start
     self.goal = goal
     self.space = DoubleIntegratorVisualizer()
     self.visualizer = PlanVisualizationProgram(
         self.space.get_planning_problem(self.start, self.goal), "ao-rrt",
         "data/ao-rrt")
     self.visualizer.width = self.visualizer.height = 640
     glutInit([])
     self.visualizer.initWindow()
     self.current_state = start
     self.V = [start]  # Vertices of the graph
     self.E = []  # Edges of the graph
     self.N = 0
     self.new = False
     self.obstacles = None  # TODO
     # Action space is time, u_y, and u_x. Acceleration can be positive or negative.
     self.action_space = Box(np.array([1.0, -1.0, -1.0]),
                             np.array([10.0, 1.0, 1.0]))
     # Observation space is width x height, current coordinates, goal coordinates, v_x, v_y
     self.observation_space = Tuple(
         (MultiDiscrete([60, 60]), MultiDiscrete([60, 60]),
          MultiDiscrete([60,
                         60]), Box(np.array([-25, -25]), np.array([25,
                                                                   25]))))

コード例 #3

0

ファイルを表示

 def observation_space(self):
     if self.action_space_type == ActionSpaceType.DIFFERENTIAL:
         # 4 is the number of possible direction - nord, sud, west, east
         return MultiDiscrete(
             (self.columns, self.rows, Direction.NB_DIRECTIONS))
     else:
         return MultiDiscrete((self.columns, self.rows))

コード例 #4

0

ファイルを表示

    def __init__(self):
        self.plant_deck = {
            "sunflower": Sunflower,
            "peashooter": Peashooter,
            "wall-nut": Wallnut
        }

        self.action_space = Discrete(
            len(self.plant_deck) * config.N_LANES * config.LANE_LENGTH + 1)
        # self.action_space = MultiDiscrete([len(self.plant_deck), config.N_LANES, config.LANE_LENGTH]) # plant, lane, pos
        self.observation_space = Tuple([
            MultiDiscrete([len(self.plant_deck) + 1] *
                          (config.N_LANES * config.LANE_LENGTH)),
            MultiDiscrete([MAX_ZOMBIE_PER_CELL + 1] *
                          (config.N_LANES * config.LANE_LENGTH)),
            Discrete(MAX_SUN),
            MultiBinary(len(self.plant_deck))
        ])  # Action available

        "Which plant on the cell, is the lane attacked, is there a mower on the lane"
        self._plant_names = [plant_name for plant_name in self.plant_deck]
        self._plant_classes = [
            self.plant_deck[plant_name].__name__
            for plant_name in self.plant_deck
        ]
        self._plant_no = {
            self._plant_classes[i]: i
            for i in range(len(self._plant_names))
        }
        self._scene = Scene(self.plant_deck, BasicZombieSpawner())
        self._reward = 0

コード例 #5

0

ファイルを表示

ファイル: kaggle_wrapper.py プロジェクト: zivzone/ray

    def build_agent_spaces(self) -> Tuple[Space, Space]:
        """Construct the action and observation spaces

        Description of actions and observations:
        https://github.com/google-research/football/blob/master/gfootball/doc/observation.md
        """  # noqa: E501
        action_space = Discrete(19)
        # The football field's corners are [+-1., +-0.42]. However, the players
        # and balls may get out of the field. Thus we multiply those limits by
        # a factor of 2.
        xlim = 1. * 2
        ylim = 0.42 * 2
        num_players: int = 11
        xy_space = Box(
            np.array([-xlim, -ylim], dtype=np.float32),
            np.array([xlim, ylim], dtype=np.float32))
        xyz_space = Box(
            np.array([-xlim, -ylim, 0], dtype=np.float32),
            np.array([xlim, ylim, np.inf], dtype=np.float32))
        observation_space = DictSpace({
            "controlled_players": Discrete(2),
            "players_raw": TupleSpace([
                DictSpace({
                    # ball information
                    "ball": xyz_space,
                    "ball_direction": Box(-np.inf, np.inf, (3, )),
                    "ball_rotation": Box(-np.inf, np.inf, (3, )),
                    "ball_owned_team": Discrete(3),
                    "ball_owned_player": Discrete(num_players + 1),
                    # left team
                    "left_team": TupleSpace([xy_space] * num_players),
                    "left_team_direction": TupleSpace(
                        [xy_space] * num_players),
                    "left_team_tired_factor": Box(0., 1., (num_players, )),
                    "left_team_yellow_card": MultiBinary(num_players),
                    "left_team_active": MultiBinary(num_players),
                    "left_team_roles": MultiDiscrete([10] * num_players),
                    # right team
                    "right_team": TupleSpace([xy_space] * num_players),
                    "right_team_direction": TupleSpace(
                        [xy_space] * num_players),
                    "right_team_tired_factor": Box(0., 1., (num_players, )),
                    "right_team_yellow_card": MultiBinary(num_players),
                    "right_team_active": MultiBinary(num_players),
                    "right_team_roles": MultiDiscrete([10] * num_players),
                    # controlled player information
                    "active": Discrete(num_players),
                    "designated": Discrete(num_players),
                    "sticky_actions": MultiBinary(10),
                    # match state
                    "score": Box(-np.inf, np.inf, (2, )),
                    "steps_left": Box(0, np.inf, (1, )),
                    "game_mode": Discrete(7)
                })
            ])
        })
        return action_space, observation_space

コード例 #6

0

ファイルを表示

class AvailActionsTestEnv(MultiAgentEnv):
    num_actions = 10
    action_space = Discrete(num_actions)
    observation_space = Dict({
        "obs":
        Dict({
            "test": Dict({
                "a": Discrete(2),
                "b": MultiDiscrete([2, 3, 4])
            }),
            "state": MultiDiscrete([2, 2, 2]),
        }),
        "action_mask":
        Box(0, 1, (num_actions, )),
    })

    def __init__(self, env_config):
        super().__init__()
        self.state = None
        self.avail = env_config.get("avail_actions", [3])
        self.action_mask = np.array([0] * 10)
        for a in self.avail:
            self.action_mask[a] = 1

    def reset(self):
        self.state = 0
        return {
            "agent_1": {
                "obs": self.observation_space["obs"].sample(),
                "action_mask": self.action_mask,
            },
            "agent_2": {
                "obs": self.observation_space["obs"].sample(),
                "action_mask": self.action_mask,
            },
        }

    def step(self, action_dict):
        if self.state > 0:
            assert (action_dict["agent_1"] in self.avail
                    and action_dict["agent_2"]
                    in self.avail), "Failed to obey available actions mask!"
        self.state += 1
        rewards = {"agent_1": 1, "agent_2": 0.5}
        obs = {
            "agent_1": {
                "obs": self.observation_space["obs"].sample(),
                "action_mask": self.action_mask,
            },
            "agent_2": {
                "obs": self.observation_space["obs"].sample(),
                "action_mask": self.action_mask,
            },
        }
        dones = {"__all__": self.state >= 20}
        return obs, rewards, dones, {}

コード例 #7

0

ファイルを表示

ファイル: test_helpers.py プロジェクト: fossabot/yarllib

def test_encoder_with_sampling(space):
    """Test space_encoder with sampling."""
    NUM_SAMPLES = int(np.prod(space))
    x = MultiDiscrete(space)
    e = Encoder(x)

    for _ in range(NUM_SAMPLES):
        i = x.sample()
        enc = e.encode(i)
        dec = e.decode(enc)
        assert np.equal(i, dec).all()

コード例 #8

0

ファイルを表示

    def __init__(self):
        self.action_space = MultiDiscrete([2, 3, 4])

        self.observation_space = MultiDiscrete([4, 5])

        self.current_step = 0
        self._valid_actions1 = torch.ones(self.action_space.nvec[0])
        self._valid_actions2 = torch.ones(self.action_space.nvec[0],
                                          self.action_space.nvec[1])
        self._valid_actions3 = torch.ones(self.action_space.nvec[0],
                                          self.action_space.nvec[1],
                                          self.action_space.nvec[2])
        self._action_mask = [
            self._valid_actions1, self._valid_actions2, self._valid_actions3
        ]

コード例 #9

0

ファイルを表示

    def __init__(self, env_config):
        self.env_config = env_config
        self.reference_world = ws.WorldBuilder.create()
        self.current_iteration = 0
        self.n_iterations = 0

        self.product_ids = self._product_ids()
        self.max_sources_per_facility = 0
        self.max_fleet_size = 0
        self.facility_types = {}
        facility_class_id = 0
        for f in self.reference_world.facilities.values():
            if f.consumer is not None:
                sources_num = len(f.consumer.sources)
                if sources_num > self.max_sources_per_facility:
                    self.max_sources_per_facility = sources_num

            if f.distribution is not None:
                if len(f.distribution.fleet) > self.max_fleet_size:
                    self.max_fleet_size = len(f.distribution.fleet)

            facility_class = f.__class__.__name__
            if facility_class not in self.facility_types:
                self.facility_types[facility_class] = facility_class_id
                facility_class_id += 1

        self.state_calculator = StateCalculator(self)
        self.reward_calculator = RewardCalculator(env_config)
        self.action_calculator = ActionCalculator(self)

        self.action_space_producer = MultiDiscrete([
            8,  # unit price
            6,  # production rate level
        ])

        self.action_space_consumer = MultiDiscrete([
            self.n_products(),  # consumer product id
            self.max_sources_per_facility,  # consumer source id
            6  # consumer_quantity
        ])

        example_state, _ = self.state_calculator.world_to_state(
            self.reference_world)
        state_dim = len(list(example_state.values())[0])
        self.observation_space = Box(low=0.00,
                                     high=1.00,
                                     shape=(state_dim, ),
                                     dtype=np.float64)

コード例 #10

0

ファイルを表示

    def __init__(self, env, stack_size=4):
        """Wrapper that returns stacks of the last n timesteps
        
        Args:
            stack_size: number of observations to be returned
        """
        super().__init__(env)

        # older observations will have lower index in the buffer
        self._buffer = None
        self._stack_size = stack_size

        old_space_screen = env.observation_space[0]
        old_space_movmnt = env.observation_space[1]

        # the new state space for the screen observation is just the previous but repeated
        new_space_screen = Box(old_space_screen.low.reshape(
            -1, *old_space_screen.low.shape).repeat(stack_size, axis=0),
                               old_space_screen.high.reshape(
                                   -1, *old_space_screen.high.shape).repeat(
                                       stack_size, axis=0),
                               dtype=old_space_screen.dtype)

        # the new state space for the position is like the previous but repeated
        new_space_movmnt = MultiDiscrete(
            [old_space_movmnt.n for _ in range(stack_size)])

        # the observations will be tuples of (stack of screens, stacks of positions)
        self.observation_space = Tuple([new_space_screen, new_space_movmnt])

コード例 #11

0

ファイルを表示

    def __init__(self, **smac_args):
        """Create a new multi-agent StarCraft env compatible with RLlib.

        Arguments:
            smac_args (dict): Arguments to pass to the underlying
                smac.env.starcraft.StarCraft2Env instance.

        Examples:
            >>> from smac_rllib import RLlibStarCraft2Env
            >>> env = RLlibStarCraft2Env(map_name="8m")
            >>> print(env.reset())
        """

        self._env = StarCraft2Env(**smac_args)
        self.horizon = self._env.episode_limit
        self.nbr_agents = self._env.n_agents
        self._ready_agents = []
        self.observation_space = Dict({
            "obs": Box(-1, 1, shape=(self.nbr_agents, self._env.get_obs_size(),)),
            "avail_actions": Box(0, 1, shape=(self.nbr_agents, self._env.get_total_actions(),)),
            "state": Box(-float('inf'), float('inf'), shape=(self._env.get_state_size(),)),
            "battle_won": Box(0,1, shape=(1,), dtype=np.bool),
            "dead_allies": Box(0,self.nbr_agents, shape=(1,), dtype=np.int),
            "dead_enemies": Box(0, int(1e3), shape=(1,), dtype=np.int)
        })
        self.action_space = MultiDiscrete([self._env.get_total_actions()] * self.nbr_agents)

コード例 #12

0

ファイルを表示

 def __init__(self,
              env,
              body_names,
              radius_multiplier=1.5,
              agent_idx_allowed_to_lock=None,
              lock_type="any_lock",
              ac_obs_prefix='',
              obj_in_game_metadata_keys=None,
              agent_allowed_to_lock_keys=None):
     super().__init__(env)
     self.n_agents = self.unwrapped.n_agents
     self.n_obj = len(body_names)
     self.body_names = body_names
     self.agent_idx_allowed_to_lock = np.arange(
         self.n_agents
     ) if agent_idx_allowed_to_lock is None else agent_idx_allowed_to_lock
     self.lock_type = lock_type
     self.ac_obs_prefix = ac_obs_prefix
     self.obj_in_game_metadata_keys = obj_in_game_metadata_keys
     self.agent_allowed_to_lock_keys = agent_allowed_to_lock_keys
     self.action_space.spaces[f'action_{ac_obs_prefix}glue'] = (Tuple(
         [MultiDiscrete([2] * self.n_obj) for _ in range(self.n_agents)]))
     self.observation_space = update_obs_space(
         env, {
             f'{ac_obs_prefix}obj_lock': (self.n_obj, 1),
             f'{ac_obs_prefix}you_lock': (self.n_agents, self.n_obj, 1),
             f'{ac_obs_prefix}team_lock': (self.n_agents, self.n_obj, 1)
         })
     self.lock_radius = radius_multiplier * self.metadata['box_size']
     self.obj_locked = np.zeros((self.n_obj, ), dtype=int)

コード例 #13

0

ファイルを表示

    def __init__(self, x_dim=5, y_dim=5, **kwargs):
        self.x_dim = x_dim
        self.y_dim = y_dim
        self.num_states = x_dim * y_dim
        # Right, Up, Left, Down, Grab
        self.action_space = Discrete(5)
        self.observation_space = Dict(
            dict(
                desired_goal=Discrete(self.num_states),  # Goal Position
                achieved_goal=Discrete(self.num_states),  # block position
                observation=MultiDiscrete([self.num_states,
                                           2])  #arm position, object in air
            ))

        self._location_space = Discrete(self.num_states)
        self._goal_location = self._location_space.sample()
        self._block_location = self._location_space.sample()
        self._arm_location = self._location_space.sample()
        self._picked_up_block = False

        self.action_handlers = [
            self._move_function(lambda s: s - 1,
                                lambda s: s % self.x_dim == 0),  # right
            self._move_function(lambda s: s - self.x_dim,
                                lambda s: s < self.x_dim),  # up
            self._move_function(lambda s: s + 1, lambda s:
                                (s + 1) % self.x_dim == 0),  # left
            self._move_function(
                lambda s: s + self.x_dim,
                lambda s: s + self.x_dim >= self.x_dim * self.y_dim),  # down
            self._grab
        ]

コード例 #14

0

ファイルを表示

ファイル: continuous_to_discrete.py プロジェクト: ronahi/Grid2Op

    def __init__(self, init_space, nb_bins):
        if not isinstance(init_space, Box):
            raise RuntimeError(
                "Impossible to convert a gym space of type {} to a discrete space"
                " (it should be of "
                "type space.Box)"
                "".format(type(init_space)))
        if nb_bins < 2:
            raise RuntimeError(
                "This do not work with less that 1 bin (if you want to ignored some part "
                "of the action_space or observation_space please use the "
                "\"gym_space.ignore_attr\" or \"gym_space.keep_only_attr\"")

        min_ = init_space.low
        max_ = init_space.high
        self._ignored = min_ == max_  # which component are ignored
        self._res = min_
        self._values = np.linspace(min_, max_, num=nb_bins + 2)
        self._values = self._values[
            1:-1, :]  # the values that will be used when using #gym_to_glop

        # TODO there might a cleaner approach here
        self._bins_size = np.linspace(min_, max_, num=2 * nb_bins + 1)
        self._bins_size = self._bins_size[
            2:-1:2, :]  # the values defining the "cuts"

        self._gen_idx = np.arange(self._bins_size.shape[-1])
        n_bins = np.ones(min_.shape[0]) * nb_bins
        n_bins[
            self.
            _ignored] = 1  # if min and max are equal, i don't want to have multiple variable
        BaseGymAttrConverter.__init__(
            self,
            space=MultiDiscrete(n_bins),
        )

コード例 #15

0

ファイルを表示

    def __init__(self):
        EzPickle.__init__(self)
        self.seed()
        self.viewer = None

        self.world = Box2D.b2World()
        self.terrain = None
        self.hull = None

        self.prev_shaping = None

        self.fd_polygon = fixtureDef(
            shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]),
            friction=FRICTION)

        self.fd_edge = fixtureDef(
            shape=edgeShape(vertices=[(0, 0), (1, 1)]),
            friction=FRICTION,
            categoryBits=0x0001,
        )

        high = np.array([np.inf] * 28)
        #self.action_space = spaces.Box(np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), dtype=np.float32)
        self.observation_space = gym.spaces.Box(-high, high, dtype=np.float32)

        self.action_space = MultiDiscrete([3, 21, 21, 21, 21])

        #self.observation_shape = (24,)
        #self.observation_space = gym.spaces.Box(low=-high, high=high, shape=self.observation_shape, dtype=np.float32)

        self.valid_actions = []
        self.state_machine = None
        self.reset()
        self.terminal = False
        self.counter = 0

コード例 #16

0

ファイルを表示

    def __init__(self, env, num_pos_buckets, num_speed_buckets):
        super().__init__(env)
        self.observation_space = MultiDiscrete(
            [num_pos_buckets, num_speed_buckets])

        self.pos_buckets = np.linspace(-1.2, 0.6, num_pos_buckets)
        self.speed_buckets = np.linspace(-0.07, 0.07, num_speed_buckets)

コード例 #17

0

ファイルを表示

    def make_obs_space(embed_dim=768,
                       max_steps=None,
                       max_utterances=5,
                       max_command_length=5,
                       max_variables=10,
                       max_actions=10,
                       **kwargs):
        true_obs = {
            'dialog_history':
            Repeated(Dict({
                'sender': Discrete(3),
                'utterance': Box(-10, 10, shape=(embed_dim, ))
            }),
                     max_len=max_utterances),
            'partial_command':
            Repeated(Box(-10, 10, shape=(embed_dim, )),
                     max_len=max_command_length),
            'variables':
            Repeated(Box(-10, 10, shape=(embed_dim, )), max_len=max_variables),
        }
        if max_steps:
            true_obs['steps'] = Discrete(max_steps)

        # return Dict(true_obs) For calculating true_obs_shsape

        return Dict({
            "true_obs":
            Dict(true_obs),
            '_action_mask':
            MultiDiscrete([2 for _ in range(max_actions)]),
            '_action_embeds':
            Box(-10, 10, shape=(max_actions, embed_dim)),
        })

コード例 #18

0

ファイルを表示

ファイル: tic_tac_toe.py プロジェクト: verdimrc/Mastering-Reinforcement-Learning-with-Python

 def __init__(self, config=None):
     self.s = 9
     self.action_space = Discrete(self.s)
     self.observation_space = MultiDiscrete([3] * self.s)
     self.agents = ["X", "O"]
     self.empty = " "
     self.t, self.state, self.rewards_to_send = self._reset()

コード例 #19

0

ファイルを表示

    def test_preprocessing_disabled(self):
        config = ppo.DEFAULT_CONFIG.copy()

        config["env"] = "ray.rllib.examples.env.random_env.RandomEnv"
        config["env_config"] = {
            "config": {
                "observation_space": Dict({
                    "a": Discrete(5),
                    "b": Dict({
                        "ba": Discrete(4),
                        "bb": Box(-1.0, 1.0, (2, 3), dtype=np.float32)
                    }),
                    "c": Tuple((MultiDiscrete([2, 3]), Discrete(1))),
                    "d": Box(-1.0, 1.0, (1, ), dtype=np.int32),
                }),
            },
        }
        # Set this to True to enforce no preprocessors being used.
        # Complex observations now arrive directly in the model as
        # structures of batches, e.g. {"a": tensor, "b": [tensor, tensor]}
        # for obs-space=Dict(a=..., b=Tuple(..., ...)).
        config["_disable_preprocessor_api"] = True

        num_iterations = 1
        # Only supported for tf so far.
        for _ in framework_iterator(config):
            trainer = ppo.PPOTrainer(config=config)
            for i in range(num_iterations):
                results = trainer.train()
                check_train_results(results)
                print(results)
            check_compute_single_action(trainer)
            trainer.stop()

コード例 #20

0

ファイルを表示

ファイル: env.py プロジェクト: eambutu/DeepRLProject

    def __init__(self, G_const=1.0, acceleration=30.0, time_step=0.01,
                 time_limit=10, friction=10.0, seed=None,
                 boundary_less=-1, boundary_greater=1, num_agents=3):
        ''' constants '''
        self.G_const = G_const
        self.acceleration = acceleration
        self.time_step = time_step
        self.time_limit = time_limit
        self.friction = friction
        if (seed is None):
            self.seed = int(time.time())
        else:
            self.seed = seed
        self.boundary_less = boundary_less
        self.boundary_greater = boundary_greater
        self.num_agents = num_agents

        self.action_space = MultiDiscrete([[0, 8] for _ in range(num_agents)])

        # It's unclear what low and high here should be. Set them to 0 so
        # that if anyone tries to use them, it is more likely that obviously
        # wrong things happen.
        self.observation_space = Box(low=0, high=0, shape=(4*(num_agents+1),))

        ''' variables that change with time '''
        self.state = State(num_agents, seed)

        self.spec = None
        self.viewer = None

コード例 #21

0

ファイルを表示

    def __init__(self):

        #####
        ##### Machine Teaching
        self.action_space = MultiDiscrete([21, 6, 4])

        self.observation_shape = (1, 33, 33)
        self.observation_space = gym.spaces.Box(low=0,
                                                high=1,
                                                shape=self.observation_shape,
                                                dtype=np.float16)

        self.counter = 0
        self.valid_actions1 = [1] * 21
        self.valid_actions2 = []

        for action in self.valid_actions1:
            self.valid_actions2.append([1] * 6)

        self.valid_actions3 = []

        for i in range(21):
            tmp = []
            for j in range(6):
                tmp.append([1] * 4)
            self.valid_actions3.append(tmp)

        self.valid_actions = [
            self.valid_actions1, self.valid_actions2, self.valid_actions3
        ]
        print('finished init')

コード例 #22

0

ファイルを表示

ファイル: test_transform.py プロジェクト: majkee15/credit_collections_rl

def test_flatten_discrete():
    md = MultiDiscrete([3, 4])
    trafo = flatten(md)

    assert trafo.target == Discrete(12)
    # check that we get all actions exactly once
    actions = []
    for (i, j) in itertools.product([0, 1, 2], [0, 1, 2, 3]):
        actions += [(i, j)]
    for i in range(0, 12):
        a = trafo.convert_from(i)
        assert a in actions, (a, actions)
        assert trafo.convert_to(a) == i
        actions = list(filter(lambda x: x != a, list(actions)))
    assert len(actions) == 0

    # same test for binary
    md = MultiBinary(3)
    trafo = flatten(md)

    assert trafo.target == Discrete(2**3)
    # check that we get all actions exactly once
    actions = []
    for (i, j, k) in itertools.product([0, 1], [0, 1], [0, 1]):
        actions += [(i, j, k)]
    for i in range(0, 8):
        a = trafo.convert_from(i)
        assert trafo.convert_to(a) == i
        assert a in actions, (a, actions)
        actions = list(filter(lambda x: x != a, actions))
    assert len(actions) == 0

    # check support for numpy array and list
    assert trafo.convert_to((1, 0, 1)) == trafo.convert_to(np.array([1, 0, 1]))
    assert trafo.convert_to((1, 0, 1)) == trafo.convert_to([1, 0, 1])

コード例 #23

0

ファイルを表示

 def __init__(self):
     self.action_space = MultiDiscrete([len(action_set_list)] * n_agents)
     low = np.array([-inf] * (len_action_list * 2 + (6 * n_agents)))
     high = np.array([inf] * (len_action_list * 2 + (6 * n_agents)))
     self.observation_space = Box(low, high, dtype=np.float32, shape=None)
     self.curr_episode = 0
     self.seed()

コード例 #24

0

ファイルを表示

ファイル: food.py プロジェクト: leonardovvla/multi-agent-cooperation-learning

    def __init__(self,
                 env,
                 eat_thresh=0.5,
                 max_food_health=10,
                 respawn_time=np.inf,
                 food_rew_type='selfish',
                 reward_scale=1.0,
                 reward_scale_obs=False):
        super().__init__(env)
        self.eat_thresh = eat_thresh
        self.max_food_health = max_food_health
        self.respawn_time = respawn_time
        self.food_rew_type = food_rew_type
        self.n_agents = self.metadata['n_agents']

        if type(reward_scale) not in [list, tuple, np.ndarray]:
            reward_scale = [reward_scale, reward_scale]
        self.reward_scale = reward_scale
        self.reward_scale_obs = reward_scale_obs

        # Reset obs/action space to match
        self.max_n_food = self.metadata['max_n_food']
        self.curr_n_food = self.metadata['curr_n_food']
        self.max_food_size = self.metadata['food_size']
        food_dim = 5 if self.reward_scale_obs else 4
        self.observation_space = update_obs_space(
            self.env, {
                'food_obs': (self.max_n_food, food_dim),
                'food_health': (self.max_n_food, 1),
                'food_eat': (self.max_n_food, 1)
            })
        self.action_space.spaces['action_eat_food'] = Tuple([
            MultiDiscrete([2] * self.max_n_food) for _ in range(self.n_agents)
        ])

コード例 #25

0

ファイルを表示

    def __init__(self):
        self.seed()
        # No rotation yet
        self.action_space = MultiDiscrete([OBJ_COUNT, GRID_SIZE, GRID_SIZE])

        # State space: TODO
        self.observation_space = None

コード例 #26

0

ファイルを表示

ファイル: DiscreteEnvironment.py プロジェクト: morganfa/hackaton-hiparis-2021

    def __init__(self, env_config, seed=42):
        # Set seed
        np.random.seed(seed)

        self.mg = env_config['building']
        self.Na = 2 + self.mg.architecture['grid'] * 3 + self.mg.architecture[
            'genset'] * 1
        if self.mg.architecture['grid'] == 1 and self.mg.architecture[
                'genset'] == 1:
            self.Na += 1
        self.action_space = Discrete(self.Na)

        self.Ns = 2  # net_load and soc
        dim1 = int(self.mg.parameters['PV_rated_power'] +
                   self.mg.parameters['load'])
        dim2 = 100
        self.observation_space = MultiDiscrete([dim1, dim2])

        self.metadata = {"render.modes": ["human"]}

        self.state, self.reward, self.done, self.info, self.round = None, None, None, None, None
        self.round = None

        # Start the first round
        self.seed()
        self.reset()

コード例 #27

0

ファイルを表示

ファイル: rl_trading_agent.py プロジェクト: GitouYou/pandas-ml-quant

    def __init__(
            self,
            input_shape: Tuple[int, ...],
            trading_fraction: int = 10,
            trading_assets:
        int = 1,  # later we want the bot to trade one of multiple possible assets
            allow_short: bool = False,
            stop_if_lost: float = None,
            initial_capital: float = 100000,
            commission=lambda size: 0.025):
        super().__init__(
            MultiDiscrete([trading_assets, trading_fraction])
            if trading_assets > 1 else Discrete(trading_fraction + 1),
            Box(low=-1, high=1, shape=input_shape)
        )  # FIXME what shape? we also need historic trades?

        self.trading_fraction = trading_fraction
        self.initial_capital = initial_capital
        self.commission = commission
        self.stop_if_lost = stop_if_lost
        self.allow_short = allow_short

        if allow_short and (trading_fraction % 2) != 0:
            _log.warning('short trades expect even nr of trading fraction')

        # eventually do not serialize ..
        self.trade_log = StreamingTransactionLog()
        self.current_net = 0

コード例 #28

0

ファイルを表示

ファイル: env.py プロジェクト: seawee1/Reinforced-Tone-Synthesis

    def __init__(self, config, vst_config):
        super(VSTEnv, self).__init__()

        self.config = config
        self.vst_config = vst_config

        self.num_knobs = len(vst_config['rnd'])
        self.num_audio_samples = int(
            config['sampleRate'] *
            config['renderLength'])  # Keep audio samples divisible by fftSize
        self.num_audio_samples = self.num_audio_samples - (
            self.num_audio_samples % config['fftSize'])
        self.num_freq = int(1 + (config['fftSize'] / 2.0))
        self.num_mfcc = 20
        #self.num_windows = int((self.num_audio_samples / config['fftSize'] - 1.0) * (config['fftSize'] / config['hopSize']) + 1.0)
        self.num_windows = int((self.num_audio_samples / config['fftSize']) *
                               (config['fftSize'] / config['hopSize']) + 1.0)

        # Mapping from action index (0, 1, ..., num_knobs) to VST parameter
        self.action_to_param = list(vst_config['rnd'].keys())

        self.action_space = MultiDiscrete([self.num_knobs, 4])
        #self.observation_space = spaces.Box(low=-1.0, high=1.0, shape=(self.num_freq, self.num_windows,))
        self.observation_space = spaces.Box(low=-1.0,
                                            high=1.0,
                                            shape=(self.num_mfcc,
                                                   self.num_windows))
        #self.observation_space = spaces.Box(low=0, high=255, shape=(self.num_freq, self.num_windows, 1))

        # Create VST engine and generator
        self.engine = rm.RenderEngine(config['sampleRate'],
                                      config['bufferSize'], config['fftSize'])
        self.engine.load_plugin(vst_config['vstPath'])
        self.generator = rm.PatchGenerator(self.engine)

コード例 #29

0

ファイルを表示

ファイル: continuous_to_discrete.py プロジェクト: rte-france/Grid2Op

    def initialize_space(self, init_space):
        if not isinstance(init_space, Box):
            raise RuntimeError("Impossible to convert a gym space of type {} to a discrete space"
                               " (it should be of "
                               "type space.Box)"
                               "".format(type(init_space)))

        min_ = init_space.low
        max_ = init_space.high
        self._ignored = min_ == max_  # which component are ignored
        self._res = min_
        self._values = np.linspace(min_, max_, num=self._nb_bins+2)
        self._values = self._values[1:-1, :]  # the values that will be used when using #gym_to_glop

        # TODO there might a cleaner approach here
        self._bins_size = np.linspace(min_, max_, num=2*self._nb_bins+1)
        self._bins_size = self._bins_size[2:-1:2, :]  # the values defining the "cuts"

        self._gen_idx = np.arange(self._bins_size.shape[-1])
        n_bins = np.ones(min_.shape[0], dtype=dt_int) * dt_int(self._nb_bins)
        n_bins[self._ignored] = 1  # if min and max are equal, i don't want to have multiple variable
        space = MultiDiscrete(n_bins)

        self.base_initialize(space=space,
                             g2op_to_gym=None,
                             gym_to_g2op=None)

コード例 #30

0

ファイルを表示

 def __init__(self, size, sleep=0, dict_state=False, recurse_state=False,
              ma_rew=0, multidiscrete_action=False, random_sleep=False):
     assert not (
         dict_state and recurse_state), \
         "dict_state and recurse_state cannot both be true"
     self.size = size
     self.sleep = sleep
     self.random_sleep = random_sleep
     self.dict_state = dict_state
     self.recurse_state = recurse_state
     self.ma_rew = ma_rew
     self._md_action = multidiscrete_action
     if dict_state:
         self.observation_space = Dict(
             {"index": Box(shape=(1, ), low=0, high=size - 1),
              "rand": Box(shape=(1,), low=0, high=1, dtype=np.float64)})
     elif recurse_state:
         self.observation_space = Dict(
             {"index": Box(shape=(1, ), low=0, high=size - 1),
              "dict": Dict({
                  "tuple": Tuple((Discrete(2), Box(shape=(2,),
                                  low=0, high=1, dtype=np.float64))),
                  "rand": Box(shape=(1, 2), low=0, high=1,
                              dtype=np.float64)})
              })
     else:
         self.observation_space = Box(shape=(1, ), low=0, high=size - 1)
     if multidiscrete_action:
         self.action_space = MultiDiscrete([2, 2])
     else:
         self.action_space = Discrete(2)
     self.done = False
     self.index = 0
     self.seed()