Python MultiDiscrete Examples, multiagent.multi_discrete.MultiDiscrete Python Examples

Example #1

0

Show file

    def __init__(self, display=True):
        #Setup bindings for being able to use as a gym environment
        act_space1 = MultiDiscrete(
            [[0, 4], [0, 2]]
        )  #action space of agent 1 consisting of 5 movement actions and 3 communications
        act_space2 = MultiDiscrete(
            [[0, 4], [0, 2]]
        )  #action space of agent 1 consisting of 5 movement actions and 3 communications
        self.action_space = [act_space1, act_space2
                             ]  #combined actions space of both the agents
        self.display = display
        #Observation of an agent:
        #   1. Its own x and y velocity (2)
        #   2. Relative distances from landmarks (3*5) and their nature (one hot encoded) (assume perfect detection)
        #       relative distances are only activated once the agent has actually went close enough to the object
        #       if the agent had not gone close to the object a placeholder like [-1,-1,-1] is used
        #       relative distances are arranged in a common order for both the agents
        #   4. Target color code for depth agent (3) / Target shape code for color agent (3) (one hot encoded)
        #   5. 10 communication channels (10)
        self.observation_space = []
        self.observation_space.append(
            spaces.Box(low=-np.inf,
                       high=+np.inf,
                       shape=(18, ),
                       dtype=np.float32))
        self.observation_space.append(
            spaces.Box(low=-np.inf,
                       high=+np.inf,
                       shape=(18, ),
                       dtype=np.float32))

        self.input_state_size = 18
        self.action_size = 5 + 3
        self.num_comm = 3
        self.n = 2  #2 agents

        self.prev_pos = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
        self.prev_pos_rel = []
        self.prev_rew = [0.0, 0.0]
        self.numcols = 0
        self.numtries = 0

        self.reset_string = "go on"
        #lists storing metrics
        self.m1 = []
        self.M1 = []

        self.m2 = []
        self.M2 = []

        self.R = Render(400, 300, self.display)

Example #2

0

Show file

    def __init__(self, world):

        self.world = world
        self.agents = self.world.agents
        # set required vectorized gym env property
        self.n = len(world.agents)
        # scenario callbacks

        # if true, every agent has the same reward
        self.shared_reward = True
        self.time = 0

        # configure spaces
        self.action_space = []
        self.observation_space = []
        for agent in self.agents:
            total_action_space = []
            action_space = spaces.Discrete(world.dim_action)
            total_action_space.append(action_space)
            act_space = MultiDiscrete([[0, act_space.n - 1]
                                       for act_space in total_action_space])
            self.action_space.append(act_space)
            # observation space
            obs_dim = len(world.observation(agent))
            self.observation_space.append(
                spaces.Box(low=-np.inf,
                           high=+np.inf,
                           shape=(obs_dim, ),
                           dtype=np.float32))

Example #3

0

Show file

    def __init__(self,
                 world,
                 reset_callback=None,
                 reward_callback=None,
                 observation_callback=None,
                 info_callback=None,
                 done_callback=None):

        self.world = world
        self.agents = self.world.policy_agents
        # scenario callbacks
        self.reset_callback = reset_callback
        self.reward_callback = reward_callback
        self.observation_callback = observation_callback
        self.info_callback = info_callback
        self.done_callback = done_callback
        # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector
        self.discrete_action_input = False
        # if true, every agent has the same reward
        self.shared_reward = world.collaborative if hasattr(
            world, 'collaborative') else False
        self.time = 0

        # configure spaces
        self.action_space = []
        self.observation_space = []
        for agent in self.agents:
            total_action_space = []
            # physical action space
            if agent.movable:
                u_action_space = spaces.Discrete(world.dim_p * 2 + 1)
                total_action_space.append(u_action_space)
            # arms action space
            if agent.armed:
                a_action_space = spaces.Discrete(
                    5)  # inop, left, right, trigger, safety
                total_action_space.append(a_action_space)
            # total action space
            if len(total_action_space) > 1:
                # all action spaces are discrete, so simplify to MultiDiscrete action space
                act_space = MultiDiscrete([[0, act_space.n - 1]
                                           for act_space in total_action_space
                                           ])
                self.action_space.append(act_space)
            else:
                self.action_space.append(total_action_space[0])
            # observation space
            obs_dim = len(observation_callback(agent, self.world))
            self.observation_space.append(
                spaces.Box(low=-np.inf,
                           high=+np.inf,
                           shape=(obs_dim, ),
                           dtype=np.float32))
            agent.action.a = np.zeros(2)

        # rendering
        self.viewers = [None]
        self._reset_render()

Example #4

0

Show file

 def configure_spaces(self):
     # configure spaces
     self.action_space = []
     self.observation_space = []
     for agent in self.agents:
         total_action_space = []
         # physical action space
         if self.discrete_action_space:
             u_action_space = spaces.Discrete(self.world.dim_p * 2 + 1)
         else:
             u_action_space = spaces.Box(low=-agent.u_range,
                                         high=+agent.u_range,
                                         shape=(self.world.dim_p, ),
                                         dtype=np.float32)
         if agent.movable:
             total_action_space.append(u_action_space)
         # communication action space
         if self.discrete_action_space:
             c_action_space = spaces.Discrete(self.world.dim_c)
         else:
             c_action_space = spaces.Box(low=0.0,
                                         high=1.0,
                                         shape=(self.world.dim_c, ),
                                         dtype=np.float32)
         if not agent.silent:
             total_action_space.append(c_action_space)
         # total action space
         if len(total_action_space) > 1:
             # all action spaces are discrete, so simplify to MultiDiscrete action space
             if all([
                     isinstance(act_space, spaces.Discrete)
                     for act_space in total_action_space
             ]):
                 act_space = MultiDiscrete(
                     [[0, act_space.n - 1]
                      for act_space in total_action_space])
             else:
                 act_space = spaces.Tuple(total_action_space)
             self.action_space.append(act_space)
         else:
             self.action_space.append(total_action_space[0])
         # observation space
         obs_dim = len(self.observation_callback(agent, self.world))
         self.observation_space.append(
             spaces.Box(low=-np.inf,
                        high=+np.inf,
                        shape=(obs_dim, ),
                        dtype=np.float32))
         agent.action.c = np.zeros(self.world.dim_c)

Example #5

0

Show file

File: environment.py Project: ptigas/multiagent-particle-envs

    def __init__(self,
                 world,
                 reset_callback=None,
                 reward_callback=None,
                 observation_callback=None,
                 info_callback=None,
                 done_callback=None,
                 shared_viewer=True):

        self.world = world
        self.agents = self.world.policy_agents
        # set required vectorized gym env property
        self.n = len(world.policy_agents)
        # scenario callbacks
        self.reset_callback = reset_callback
        self.reward_callback = reward_callback
        self.observation_callback = observation_callback
        self.info_callback = info_callback
        self.done_callback = done_callback
        # environment parameters
        self.discrete_action_space = True
        # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector
        self.discrete_action_input = False
        # if true, even the action is continuous, action will be performed discretely
        self.force_discrete_action = world.discrete_action if hasattr(
            world, 'discrete_action') else False
        # if true, every agent has the same reward
        self.shared_reward = world.collaborative if hasattr(
            world, 'collaborative') else False
        self.time = 0

        # configure spaces
        self.action_space = []
        self.observation_space = []
        for agent in self.agents:
            total_action_space = []
            # physical action space
            if self.discrete_action_space:
                u_action_space = spaces.Discrete(world.dim_p * 2 + 1)
            else:
                u_action_space = spaces.Box(low=-agent.u_range,
                                            high=+agent.u_range,
                                            shape=(world.dim_p, ),
                                            dtype=np.float32)
            if agent.movable:
                total_action_space.append(u_action_space)
            # communication action space
            if self.discrete_action_space:
                c_action_space = spaces.Discrete(world.dim_c)
            else:
                c_action_space = spaces.Box(low=0.0,
                                            high=1.0,
                                            shape=(world.dim_c, ),
                                            dtype=np.float32)
            if not agent.silent:
                total_action_space.append(c_action_space)
            # total action space
            if len(total_action_space) > 1:
                # all action spaces are discrete, so simplify to MultiDiscrete action space
                if all([
                        isinstance(act_space, spaces.Discrete)
                        for act_space in total_action_space
                ]):
                    act_space = MultiDiscrete(
                        [[0, act_space.n - 1]
                         for act_space in total_action_space])
                else:
                    act_space = spaces.Tuple(total_action_space)
                self.action_space.append(act_space)
            else:
                self.action_space.append(total_action_space[0])
            # observation space
            obs_dim = len(observation_callback(agent, self.world))
            self.observation_space.append(
                spaces.Box(low=-np.inf,
                           high=+np.inf,
                           shape=(obs_dim, ),
                           dtype=np.float32))
            agent.action.c = np.zeros(self.world.dim_c)

        # rendering
        self.shared_viewer = shared_viewer
        if self.shared_viewer:
            self.viewers = [None]
        else:
            self.viewers = [None] * self.n
        self._reset_render()

Example #6

0

Show file

    def __init__(self,
                 world,
                 reset_callback=None,
                 reward_callback=None,
                 observation_callback=None,
                 info_callback=None,
                 done_callback=None,
                 shared_viewer=True):

        self.world = world
        self.agents = self.world.policy_agents
        # set required vectorized gym env property
        self.n = len(world.policy_agents)
        # scenario callbacks
        self.reset_callback = reset_callback
        self.reward_callback = reward_callback
        self.observation_callback = observation_callback
        self.info_callback = info_callback
        self.done_callback = done_callback
        # environment parameters
        self.discrete_action_space = True
        # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector
        self.discrete_action_input = False
        # if true, even the action is continuous, action will be performed discretely
        self.force_discrete_action = world.discrete_action if hasattr(
            world, 'discrete_action') else False
        # if true, every agent has the same reward
        self.shared_reward = world.collaborative if hasattr(
            world, 'collaborative') else False
        self.time = 0

        # configure spaces
        self.action_space = []
        self.observation_space = []
        for agent in self.agents:
            total_action_space = []
            # physical action space
            if self.discrete_action_space:
                # 假如连续动作空间不行的话就只能转到离散的动作空间，后面再看吧
                u_action_space = spaces.Discrete(world.dim_p * 2 + 1)
            else:
                # 这里需要重新设置一下动作空间，我们这里的动作应该是到来流量的分配比例，比如一条路分0.2，另一条路就应该分0.8
                # 那么这里的动作空间就应该是等于与本节点直接相连的下一跳的节点数目，然后应该需要进行一个归一化的过程
                # maddpg原代码中采用的是discrete_action_space，可以先看看这个box的代码能不能正常工作
                # 另外这里写的是只有一个egress node的情况，若有多个egress node，我们
                u_action_space = spaces.Box(low=0,
                                            high=1.0,
                                            shape=(len(agent.next_neighbors)),
                                            dtype=np.float32)
                # u_action_space = spaces.Box(low=-agent.u_range, high=+agent.u_range, shape=(world.dim_p,), dtype=np.float32)
            # if agent.movable:
            total_action_space.append(u_action_space)
            # communication action space
            if self.discrete_action_space:
                c_action_space = spaces.Discrete(world.dim_c)
            else:
                c_action_space = spaces.Box(low=0.0,
                                            high=1.0,
                                            shape=(world.dim_c, ),
                                            dtype=np.float32)
            if not agent.silent:
                total_action_space.append(c_action_space)
            # total action space
            if len(total_action_space) > 1:
                # all action spaces are discrete, so simplify to MultiDiscrete action space
                if all([
                        isinstance(act_space, spaces.Discrete)
                        for act_space in total_action_space
                ]):
                    act_space = MultiDiscrete(
                        [[0, act_space.n - 1]
                         for act_space in total_action_space])
                else:
                    act_space = spaces.Tuple(total_action_space)
                self.action_space.append(act_space)
            else:
                self.action_space.append(total_action_space[0])
            # observation space
            obs_dim = len(observation_callback(agent, self.world))
            self.observation_space.append(
                spaces.Box(low=-np.inf,
                           high=+np.inf,
                           shape=(obs_dim, ),
                           dtype=np.float32))
            agent.action.c = np.zeros(self.world.dim_c)

Example #7

0

Show file

File: open_fortattack.py Project: uoe-agents/GPL

    def __init__(self,
                 world,
                 reset_callback=None,
                 reward_callback=None,
                 observation_callback=None,
                 info_callback=None,
                 done_callback=None,
                 seed_callback=None,
                 shared_viewer=True,
                 arguments={},
                 seed=100,
                 with_oppo_modelling=False,
                 team_mode=None,
                 agent_type=-1):
        self.with_oppo_modelling = with_oppo_modelling
        self.ob_rms = None
        self.world = world
        self.seed_val = seed
        self.randomizer = RandomState(self.seed_val)
        self.agent_type = agent_type
        self.team_mode = team_mode

        self.agents = self.world.policy_agents
        # set required vectorized gym env property
        self.n = len(world.policy_agents)
        if self.with_oppo_modelling:
            self.prev_actions = None
        # scenario callbacks
        self.reset_callback = reset_callback
        self.reward_callback = reward_callback
        self.observation_callback = observation_callback
        self.info_callback = info_callback
        self.done_callback = done_callback
        self.seed_callback = seed_callback
        self.policy_id = None
        # environment parameters
        self.discrete_action_space = True
        # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector
        self.discrete_action_input = True  # False
        # if true, even the action is continuous, action will be performed discretely
        self.force_discrete_action = world.discrete_action if hasattr(
            world, 'discrete_action') else False
        # if true, every agent has the same reward
        self.shared_reward = world.collaborative if hasattr(
            world, 'collaborative') else False
        self.ad_hoc_agent_id = None
        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")
        self.args = arguments
        self.namespace_args = Namespace(**self.args)
        self.teammate_obs = None

        # configure spaces
        self.action_spaces = []
        self.observation_spaces = []
        obs_shapes = []
        self.step_num = 0
        self.agent_num = len(self.agents)
        for agent in self.agents:
            total_action_space = []
            # physical action space
            if self.discrete_action_space:
                u_action_space = spaces.Discrete((world.dim_p) * 2 + 2)  ##
            else:
                u_action_space = spaces.Box(low=-agent.u_range,
                                            high=+agent.u_range,
                                            shape=(world.dim_p, ),
                                            dtype=np.float32)
            if agent.movable:
                total_action_space.append(u_action_space)
            # communication action space
            if self.discrete_action_space:
                c_action_space = spaces.Discrete(world.dim_c)
            else:
                c_action_space = spaces.Box(low=0.0,
                                            high=1.0,
                                            shape=(world.dim_c, ),
                                            dtype=np.float32)
            if not agent.silent:
                total_action_space.append(c_action_space)
            # total action space
            if len(total_action_space) > 1:
                # all action spaces are discrete, so simplify to MultiDiscrete action space
                if all([
                        isinstance(act_space, spaces.Discrete)
                        for act_space in total_action_space
                ]):
                    act_space = MultiDiscrete(
                        [[0, act_space.n - 1]
                         for act_space in total_action_space])
                else:
                    act_space = spaces.Tuple(total_action_space)
                self.action_spaces.append(act_space)
            else:
                self.action_spaces.append(total_action_space[0])
            # observation space
            obs_dim = len(observation_callback(agent, self.world))
            obs_shapes.append((obs_dim, ))
            self.observation_spaces.append(
                spaces.Box(low=-np.inf,
                           high=+np.inf,
                           shape=(obs_dim, ),
                           dtype=np.float32))
            agent.action.c = np.zeros(self.world.dim_c)

        # action has 8 values:
        # nothing, +forcex, -forcex, +forcey, -forcey, +rot, -rot, shoot
        self.action_space = Box(low=0.,
                                high=1.,
                                shape=((world.dim_p) * 2 + 2, ))  ##
        self.observation_space = Box(low=-np.inf,
                                     high=+np.inf,
                                     shape=(self.world.numAgents,
                                            obs_shapes[0][0] + 2))

        if self.with_oppo_modelling:
            self.observation_space = Box(low=-np.inf,
                                         high=+np.inf,
                                         shape=(self.world.numAgents,
                                                obs_shapes[0][0] + 3))

        # Create set of learners here
        self.master = None

        #self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces)
        self.action_range = [0., 1.]
        # rendering
        self.shared_viewer = shared_viewer
        if self.shared_viewer:
            self.viewers = [None]
        else:
            self.viewers = [None] * self.n

        self.prevShot, self.shot = False, False  # used for rendering
        self._reset_render()

Example #8

0

Show file

    def __init__(self, world, reset_callback=None, reward_callback=None,
                 observation_callback=None, info_callback=None,
                 done_callback=None, shared_viewer=True):

        self.world = world
        self.world.discrete_action = True
        self.agents = self.world.policy_agents
        # set required vectorized gym env property
        self.n = len(world.policy_agents)
        # scenario callbacks
        self.reset_callback = reset_callback
        self.reward_callback = reward_callback
        self.observation_callback = observation_callback
        self.info_callback = info_callback
        self.done_callback = done_callback
        # environment parameters
        self.discrete_action_space = True
        # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector
        self.discrete_action_input = False
        # if true, even the action is continuous, action will be performed discretely
        self.force_discrete_action = world.discrete_action if hasattr(self.world, 'discrete_action') else False
        # if true, every agent has the same reward
        self.shared_reward = world.collaborative if hasattr(self.world, 'collaborative') else False
        self.time = 0

        # configure spaces
        self.action_space = []
        self.observation_space = []
        obs_shapes = []
        self.agent_num = len(self.agents)
        ## I think everything inside this loop is useless, self.action_spaces is used later and not self.action_space
        for agent in self.agents:
            total_action_space = []
            # physical action space
            if self.discrete_action_space:
                u_action_space = spaces.Discrete((world.dim_p-1) * 2 + 3)   ##
            else:
                u_action_space = spaces.Box(low=-agent.u_range, high=+agent.u_range, shape=(world.dim_p,), dtype=np.float32)
            if agent.movable:
                total_action_space.append(u_action_space)
            # communication action space
            if self.discrete_action_space:
                c_action_space = spaces.Discrete(world.dim_c)
            else:
                c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c,), dtype=np.float32)
            if not agent.silent:
                total_action_space.append(c_action_space)
            # total action space
            if len(total_action_space) > 1:
                # all action spaces are discrete, so simplify to MultiDiscrete action space
                if all([isinstance(act_space, spaces.Discrete) for act_space in total_action_space]):
                    act_space = MultiDiscrete([[0, act_space.n - 1] for act_space in total_action_space])
                else:
                    act_space = spaces.Tuple(total_action_space)
                self.action_space.append(act_space)
            else:
                self.action_space.append(total_action_space[0])
            # observation space
            obs_dim = len(observation_callback(agent, self.world))
            obs_shapes.append((obs_dim,))
            self.observation_space.append(spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim,), dtype=np.float32))
            agent.action.c = np.zeros(self.world.dim_c)
        # simpified for non-comm game

        # self.action_spaces = MASpace(tuple(Box(low=-1., high=1., shape=(1,)) for _ in range(self.agent_num)))
        # self.observation_spaces = MASpace(tuple(Discrete(1) for _ in range(self.agent_num)))

        # action originally had 5 values - accel, +forcex, -forcex, +forcey, -forcey
        # I added extra 2 components, change in rotation angle and shoot 
        self.action_spaces = MASpace(tuple(Box(low=0., high=1., shape=((world.dim_p-1) * 2 + 3,)) for _ in range(self.agent_num)))  ##
        self.observation_spaces = MASpace(tuple(Box(low=-np.inf, high=+np.inf, shape=obs_shape) for obs_shape in obs_shapes))

        self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces)
        self.action_range = [0., 1.]
        # rendering
        self.shared_viewer = shared_viewer
        if self.shared_viewer:
            self.viewers = [None]
        else:
            self.viewers = [None] * self.n
        mixer.init()
        soundFiles = gym_fortattack.__file__[:-11]+'envs/Game/'
        # bulletFile = os.path.realpath(__file__)[:-13]+'Game/bullet.mp3'
        mixer.music.load(soundFiles+'bullet.mp3')
        # print(gym_fortattack.__file__)
        # time.sleep(5)
        self.prevShot, self.shot = False, False     # used for rendering
        self._reset_render()

Example #9

0

Show file

    def __init__(self,
                 world,
                 reset_callback=None,
                 reward_callback=None,
                 observation_callback=None,
                 info_callback=None,
                 done_callback=None,
                 shared_viewer=True,
                 reset_vehicle_callback=None):
        super(MultiAgentEnv,
              self).__init__(world, reset_callback, reward_callback,
                             observation_callback, info_callback,
                             done_callback, shared_viewer)

        # reset discreate action params
        self.discrete_action_space = False
        self.discrete_action_input = False
        self.force_discrete_action = False

        # reset action space
        from gym import spaces
        from multiagent.multi_discrete import MultiDiscrete
        self.action_space = []
        for agent in self.agents:
            total_action_space = []
            # physical action space
            if self.discrete_action_space:
                u_action_space = spaces.Discrete(world.dim_p * 2 + 1)
            else:
                u_action_space = spaces.Box(low=-agent.u_range,
                                            high=+agent.u_range,
                                            shape=(world.dim_p, ),
                                            dtype=np.float32)
            if agent.movable:
                total_action_space.append(u_action_space)
            # communication action space
            if self.discrete_action_space:
                c_action_space = spaces.Discrete(world.dim_c)
            else:
                c_action_space = spaces.Box(low=0.0,
                                            high=1.0,
                                            shape=(world.dim_c, ),
                                            dtype=np.float32)
            if not agent.silent:
                total_action_space.append(c_action_space)
            # total action space
            if len(total_action_space) > 1:
                # all action spaces are discrete, so simplify to MultiDiscrete action space
                if all([
                        isinstance(act_space, spaces.Discrete)
                        for act_space in total_action_space
                ]):
                    act_space = MultiDiscrete(
                        [[0, act_space.n - 1]
                         for act_space in total_action_space])
                else:
                    act_space = spaces.Tuple(total_action_space)
                self.action_space.append(act_space)
            else:
                self.action_space.append(total_action_space[0])

            self.reset_vehicle_callback = reset_vehicle_callback

            self.render_geoms = None
            self.render_geoms_xform = None

Example #10

0

Show file

    def __init__(self,
                 world,
                 env_spec,
                 reset_callback=None,
                 reward_callback=None,
                 observation_callback=None,
                 info_callback=None,
                 done_callback=None,
                 shared_viewer=True):

        self.world = world
        self.env_spec = env_spec
        self.agents = self.world.policy_agents
        # set required vectorized gym env property
        self.n = len(world.policy_agents)
        # scenario callbacks
        self.reset_callback = reset_callback
        self.reward_callback = reward_callback
        self.observation_callback = observation_callback
        self.info_callback = info_callback
        self.done_callback = done_callback
        # environment parameters
        self.discrete_action_space = True
        # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector
        self.discrete_action_input = False
        # if true, even the action is continuous, action will be performed discretely
        self.force_discrete_action = world.discrete_action if hasattr(
            world, 'discrete_action') else False
        # if true, every agent has the same reward
        self.shared_reward = world.collaborative if hasattr(
            world, 'collaborative') else False
        self.time = 0

        # configure spaces
        self.action_space = []
        self.observation_space = []
        for agent in self.agents:
            total_action_space = []
            # physical action space
            if self.discrete_action_space:
                u_action_space = spaces.Discrete(world.dim_p * 2 + 1)
            else:
                u_action_space = spaces.Box(low=-agent.u_range,
                                            high=+agent.u_range,
                                            shape=(world.dim_p, ),
                                            dtype=np.float32)
            if agent.movable:
                total_action_space.append(u_action_space)
            # communication action space
            if self.discrete_action_space:
                c_action_space = spaces.Discrete(world.dim_c)
            else:
                c_action_space = spaces.Box(low=0.0,
                                            high=1.0,
                                            shape=(world.dim_c, ),
                                            dtype=np.float32)
            if not agent.silent:
                total_action_space.append(c_action_space)
            # total action space
            if len(total_action_space) > 1:
                # all action spaces are discrete, so simplify to MultiDiscrete action space
                if all([
                        isinstance(act_space, spaces.Discrete)
                        for act_space in total_action_space
                ]):
                    act_space = MultiDiscrete(
                        [[0, act_space.n - 1]
                         for act_space in total_action_space])
                else:
                    act_space = spaces.Tuple(total_action_space)
                self.action_space.append(act_space)
            else:
                self.action_space.append(total_action_space[0])
            # observation space
            obs_dim = len(observation_callback(agent, self.world))
            self.observation_space.append(
                spaces.Box(low=-np.inf,
                           high=+np.inf,
                           shape=(obs_dim, ),
                           dtype=np.float32))
            agent.action.c = np.zeros(self.world.dim_c)

        # rendering
        self.shared_viewer = shared_viewer

        # import rendering only if needed. TODO Add a global rendering flag
        from multiagent import rendering
        # TODO Create a debug flag for this
        w = self.env_spec.width * 2 + 0.2
        h = self.env_spec.height * 2 + 0.2
        self.world_bound_geom = rendering.make_polygon([(0, 0), (w, 0), (w, h),
                                                        (0, h)], False)
        self.world_bound_geom.set_color(0., 0., 0., alpha=0.3)
        xform = rendering.Transform()
        xform.set_translation(-w / 2, -h / 2)
        self.world_bound_geom.add_attr(xform)
        if self.shared_viewer:
            self.viewers = [rendering.Viewer(700, 700)]
        else:
            self.viewers = [rendering.Viewer(700, 700) for i in self.n]
        for i, viewer in enumerate(self.viewers):
            viewer.cam_range = self.env_spec.zoom
        self._reset_render()

Example #11

0

Show file

    def __init__(self,
                 world,
                 reset_callback=None,
                 reward_callback=None,
                 observation_callback=None,
                 info_callback=None,
                 done_callback=None,
                 shared_viewer=True):

        #1.  word, agent, n (number of agents), time, and two array action space and observation space
        self.world = world
        self.agents = self.world.policy_agents
        self.time = 0
        # set required vectorized gym env property
        self.n = len(world.policy_agents)
        self.action_space = []
        self.observation_space = []

        # scenario callbacks
        self.reset_callback = reset_callback
        self.reward_callback = reward_callback
        self.observation_callback = observation_callback
        self.info_callback = info_callback
        self.done_callback = done_callback
        # environment parameters
        self.discrete_action_space = True
        # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector

        # one-hot mean change the categorical variable to only 0 and 1 for existence.
        # // see reference : https://hackernoon.com/what-is-one-hot-encoding-why-and-when-do-you-have-to-use-it-e3c6186d008f

        self.discrete_action_input = False
        # if true, even the action is continuous, action will be performed discretely
        self.force_discrete_action = world.discrete_action if hasattr(
            world, 'discrete_action') else False
        # if true, every agent has the same reward
        self.shared_reward = world.collaborative if hasattr(
            world, 'collaborative') else False

        ## configure spaces
        self.action_space = []
        self.observation_space = []
        #2. useful, 2.1, 2.2, 2.4, 2.5
        for agent in self.agents:
            total_action_space = [
            ]  #including agent_action_space, communication_action_space
            # physical action space

            ##2.1
            # physical action space
            if self.discrete_action_space:
                # dim_p is 2 for 2-dimensional space in this case, we can also change the value on world class, world class also have the communication channel dim
                # might also to use it when do the language tranining

                # in here the u_action_space is Discrete type, but it is actually a tuple, the result is 5 on the
                # parenthesis
                u_action_space = spaces.Discrete(world.dim_p * 2 +
                                                 1)  # = tuple
            else:
                ## reference to : https://stackoverflow.com/questions/44404281/openai-gym-understanding-action-space-notation-spaces-box
                u_action_space = spaces.Box(low=-agent.u_range,
                                            high=+agent.u_range,
                                            shape=(world.dim_p, ),
                                            dtype=np.float32)

            #2.2
            if agent.movable:
                ## u_action_space might be up, down, left and right
                total_action_space.append(u_action_space)
            # communication action space

            #2.3

            if self.discrete_action_space:
                ## TODO why not * 2 + 1 as same as the pyhsical action
                c_action_space = spaces.Discrete(world.dim_c * 2 + 1)
            else:
                c_action_space = spaces.Box(low=0.0,
                                            high=1.0,
                                            shape=(world.dim_c, ),
                                            dtype=np.float32)

            #2.4
            ## agent can send signal by current setting, thus, we have an communication action space
            if not agent.silent:
                total_action_space.append(c_action_space)

            #2.5
            # total action space
            if len(total_action_space) > 1:
                # all action spaces are discrete, so simplify to MultiDiscrete action space
                if all([
                        isinstance(act_space, spaces.Discrete)
                        for act_space in total_action_space
                ]):
                    ## check the source code, because we do not have the communcation channel [0, -1], we only have physical action [0,4]
                    act_space = MultiDiscrete(
                        [[0, act_space.n - 1]
                         for act_space in total_action_space])
                else:
                    act_space = spaces.Tuple(total_action_space)
                self.action_space.append(act_space)
            else:
                self.action_space.append(total_action_space[0])
            # observation space
            ## TODO
            ## default value for obervation_callback is none, might need to check the senario
            obs_dim = len(observation_callback(agent, self.world))
            ## check more for the BOX in gym
            ## really value between - inf to + inf for the observation?
            self.observation_space.append(
                spaces.Box(low=-np.inf,
                           high=+np.inf,
                           shape=(obs_dim, ),
                           dtype=np.float32))

            ## currently an size 0 array
            agent.action.c = np.zeros(self.world.dim_c)

        # rendering
        self.shared_viewer = shared_viewer
        if self.shared_viewer:
            self.viewers = [None]
        else:
            self.viewers = [None] * self.n
        self._reset_render()

Example #12

0

Show file

File: environment.py Project: x35f/MCM2018_A

 def new_act_space(self):
     total_action_space = []
     u_act_space=spaces.Discrete(self.world.dim_p*2+1)
     total_action_space.append(u_act_space)
     return MultiDiscrete([[0, act_space.n - 1] for act_space in total_action_space])