def __init__(self, display=True): #Setup bindings for being able to use as a gym environment act_space1 = MultiDiscrete( [[0, 4], [0, 2]] ) #action space of agent 1 consisting of 5 movement actions and 3 communications act_space2 = MultiDiscrete( [[0, 4], [0, 2]] ) #action space of agent 1 consisting of 5 movement actions and 3 communications self.action_space = [act_space1, act_space2 ] #combined actions space of both the agents self.display = display #Observation of an agent: # 1. Its own x and y velocity (2) # 2. Relative distances from landmarks (3*5) and their nature (one hot encoded) (assume perfect detection) # relative distances are only activated once the agent has actually went close enough to the object # if the agent had not gone close to the object a placeholder like [-1,-1,-1] is used # relative distances are arranged in a common order for both the agents # 4. Target color code for depth agent (3) / Target shape code for color agent (3) (one hot encoded) # 5. 10 communication channels (10) self.observation_space = [] self.observation_space.append( spaces.Box(low=-np.inf, high=+np.inf, shape=(18, ), dtype=np.float32)) self.observation_space.append( spaces.Box(low=-np.inf, high=+np.inf, shape=(18, ), dtype=np.float32)) self.input_state_size = 18 self.action_size = 5 + 3 self.num_comm = 3 self.n = 2 #2 agents self.prev_pos = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]] self.prev_pos_rel = [] self.prev_rew = [0.0, 0.0] self.numcols = 0 self.numtries = 0 self.reset_string = "go on" #lists storing metrics self.m1 = [] self.M1 = [] self.m2 = [] self.M2 = [] self.R = Render(400, 300, self.display)
def __init__(self, world): self.world = world self.agents = self.world.agents # set required vectorized gym env property self.n = len(world.agents) # scenario callbacks # if true, every agent has the same reward self.shared_reward = True self.time = 0 # configure spaces self.action_space = [] self.observation_space = [] for agent in self.agents: total_action_space = [] action_space = spaces.Discrete(world.dim_action) total_action_space.append(action_space) act_space = MultiDiscrete([[0, act_space.n - 1] for act_space in total_action_space]) self.action_space.append(act_space) # observation space obs_dim = len(world.observation(agent)) self.observation_space.append( spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim, ), dtype=np.float32))
def __init__(self, world, reset_callback=None, reward_callback=None, observation_callback=None, info_callback=None, done_callback=None): self.world = world self.agents = self.world.policy_agents # scenario callbacks self.reset_callback = reset_callback self.reward_callback = reward_callback self.observation_callback = observation_callback self.info_callback = info_callback self.done_callback = done_callback # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector self.discrete_action_input = False # if true, every agent has the same reward self.shared_reward = world.collaborative if hasattr( world, 'collaborative') else False self.time = 0 # configure spaces self.action_space = [] self.observation_space = [] for agent in self.agents: total_action_space = [] # physical action space if agent.movable: u_action_space = spaces.Discrete(world.dim_p * 2 + 1) total_action_space.append(u_action_space) # arms action space if agent.armed: a_action_space = spaces.Discrete( 5) # inop, left, right, trigger, safety total_action_space.append(a_action_space) # total action space if len(total_action_space) > 1: # all action spaces are discrete, so simplify to MultiDiscrete action space act_space = MultiDiscrete([[0, act_space.n - 1] for act_space in total_action_space ]) self.action_space.append(act_space) else: self.action_space.append(total_action_space[0]) # observation space obs_dim = len(observation_callback(agent, self.world)) self.observation_space.append( spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim, ), dtype=np.float32)) agent.action.a = np.zeros(2) # rendering self.viewers = [None] self._reset_render()
def configure_spaces(self): # configure spaces self.action_space = [] self.observation_space = [] for agent in self.agents: total_action_space = [] # physical action space if self.discrete_action_space: u_action_space = spaces.Discrete(self.world.dim_p * 2 + 1) else: u_action_space = spaces.Box(low=-agent.u_range, high=+agent.u_range, shape=(self.world.dim_p, ), dtype=np.float32) if agent.movable: total_action_space.append(u_action_space) # communication action space if self.discrete_action_space: c_action_space = spaces.Discrete(self.world.dim_c) else: c_action_space = spaces.Box(low=0.0, high=1.0, shape=(self.world.dim_c, ), dtype=np.float32) if not agent.silent: total_action_space.append(c_action_space) # total action space if len(total_action_space) > 1: # all action spaces are discrete, so simplify to MultiDiscrete action space if all([ isinstance(act_space, spaces.Discrete) for act_space in total_action_space ]): act_space = MultiDiscrete( [[0, act_space.n - 1] for act_space in total_action_space]) else: act_space = spaces.Tuple(total_action_space) self.action_space.append(act_space) else: self.action_space.append(total_action_space[0]) # observation space obs_dim = len(self.observation_callback(agent, self.world)) self.observation_space.append( spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim, ), dtype=np.float32)) agent.action.c = np.zeros(self.world.dim_c)
def __init__(self, world, reset_callback=None, reward_callback=None, observation_callback=None, info_callback=None, done_callback=None, shared_viewer=True): self.world = world self.agents = self.world.policy_agents # set required vectorized gym env property self.n = len(world.policy_agents) # scenario callbacks self.reset_callback = reset_callback self.reward_callback = reward_callback self.observation_callback = observation_callback self.info_callback = info_callback self.done_callback = done_callback # environment parameters self.discrete_action_space = True # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector self.discrete_action_input = False # if true, even the action is continuous, action will be performed discretely self.force_discrete_action = world.discrete_action if hasattr( world, 'discrete_action') else False # if true, every agent has the same reward self.shared_reward = world.collaborative if hasattr( world, 'collaborative') else False self.time = 0 # configure spaces self.action_space = [] self.observation_space = [] for agent in self.agents: total_action_space = [] # physical action space if self.discrete_action_space: u_action_space = spaces.Discrete(world.dim_p * 2 + 1) else: u_action_space = spaces.Box(low=-agent.u_range, high=+agent.u_range, shape=(world.dim_p, ), dtype=np.float32) if agent.movable: total_action_space.append(u_action_space) # communication action space if self.discrete_action_space: c_action_space = spaces.Discrete(world.dim_c) else: c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c, ), dtype=np.float32) if not agent.silent: total_action_space.append(c_action_space) # total action space if len(total_action_space) > 1: # all action spaces are discrete, so simplify to MultiDiscrete action space if all([ isinstance(act_space, spaces.Discrete) for act_space in total_action_space ]): act_space = MultiDiscrete( [[0, act_space.n - 1] for act_space in total_action_space]) else: act_space = spaces.Tuple(total_action_space) self.action_space.append(act_space) else: self.action_space.append(total_action_space[0]) # observation space obs_dim = len(observation_callback(agent, self.world)) self.observation_space.append( spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim, ), dtype=np.float32)) agent.action.c = np.zeros(self.world.dim_c) # rendering self.shared_viewer = shared_viewer if self.shared_viewer: self.viewers = [None] else: self.viewers = [None] * self.n self._reset_render()
def __init__(self, world, reset_callback=None, reward_callback=None, observation_callback=None, info_callback=None, done_callback=None, shared_viewer=True): self.world = world self.agents = self.world.policy_agents # set required vectorized gym env property self.n = len(world.policy_agents) # scenario callbacks self.reset_callback = reset_callback self.reward_callback = reward_callback self.observation_callback = observation_callback self.info_callback = info_callback self.done_callback = done_callback # environment parameters self.discrete_action_space = True # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector self.discrete_action_input = False # if true, even the action is continuous, action will be performed discretely self.force_discrete_action = world.discrete_action if hasattr( world, 'discrete_action') else False # if true, every agent has the same reward self.shared_reward = world.collaborative if hasattr( world, 'collaborative') else False self.time = 0 # configure spaces self.action_space = [] self.observation_space = [] for agent in self.agents: total_action_space = [] # physical action space if self.discrete_action_space: # 假如连续动作空间不行的话就只能转到离散的动作空间,后面再看吧 u_action_space = spaces.Discrete(world.dim_p * 2 + 1) else: # 这里需要重新设置一下动作空间,我们这里的动作应该是到来流量的分配比例,比如一条路分0.2,另一条路就应该分0.8 # 那么这里的动作空间就应该是等于与本节点直接相连的下一跳的节点数目,然后应该需要进行一个归一化的过程 # maddpg原代码中采用的是discrete_action_space,可以先看看这个box的代码能不能正常工作 # 另外这里写的是只有一个egress node的情况,若有多个egress node,我们 u_action_space = spaces.Box(low=0, high=1.0, shape=(len(agent.next_neighbors)), dtype=np.float32) # u_action_space = spaces.Box(low=-agent.u_range, high=+agent.u_range, shape=(world.dim_p,), dtype=np.float32) # if agent.movable: total_action_space.append(u_action_space) # communication action space if self.discrete_action_space: c_action_space = spaces.Discrete(world.dim_c) else: c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c, ), dtype=np.float32) if not agent.silent: total_action_space.append(c_action_space) # total action space if len(total_action_space) > 1: # all action spaces are discrete, so simplify to MultiDiscrete action space if all([ isinstance(act_space, spaces.Discrete) for act_space in total_action_space ]): act_space = MultiDiscrete( [[0, act_space.n - 1] for act_space in total_action_space]) else: act_space = spaces.Tuple(total_action_space) self.action_space.append(act_space) else: self.action_space.append(total_action_space[0]) # observation space obs_dim = len(observation_callback(agent, self.world)) self.observation_space.append( spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim, ), dtype=np.float32)) agent.action.c = np.zeros(self.world.dim_c)
def __init__(self, world, reset_callback=None, reward_callback=None, observation_callback=None, info_callback=None, done_callback=None, seed_callback=None, shared_viewer=True, arguments={}, seed=100, with_oppo_modelling=False, team_mode=None, agent_type=-1): self.with_oppo_modelling = with_oppo_modelling self.ob_rms = None self.world = world self.seed_val = seed self.randomizer = RandomState(self.seed_val) self.agent_type = agent_type self.team_mode = team_mode self.agents = self.world.policy_agents # set required vectorized gym env property self.n = len(world.policy_agents) if self.with_oppo_modelling: self.prev_actions = None # scenario callbacks self.reset_callback = reset_callback self.reward_callback = reward_callback self.observation_callback = observation_callback self.info_callback = info_callback self.done_callback = done_callback self.seed_callback = seed_callback self.policy_id = None # environment parameters self.discrete_action_space = True # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector self.discrete_action_input = True # False # if true, even the action is continuous, action will be performed discretely self.force_discrete_action = world.discrete_action if hasattr( world, 'discrete_action') else False # if true, every agent has the same reward self.shared_reward = world.collaborative if hasattr( world, 'collaborative') else False self.ad_hoc_agent_id = None self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") self.args = arguments self.namespace_args = Namespace(**self.args) self.teammate_obs = None # configure spaces self.action_spaces = [] self.observation_spaces = [] obs_shapes = [] self.step_num = 0 self.agent_num = len(self.agents) for agent in self.agents: total_action_space = [] # physical action space if self.discrete_action_space: u_action_space = spaces.Discrete((world.dim_p) * 2 + 2) ## else: u_action_space = spaces.Box(low=-agent.u_range, high=+agent.u_range, shape=(world.dim_p, ), dtype=np.float32) if agent.movable: total_action_space.append(u_action_space) # communication action space if self.discrete_action_space: c_action_space = spaces.Discrete(world.dim_c) else: c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c, ), dtype=np.float32) if not agent.silent: total_action_space.append(c_action_space) # total action space if len(total_action_space) > 1: # all action spaces are discrete, so simplify to MultiDiscrete action space if all([ isinstance(act_space, spaces.Discrete) for act_space in total_action_space ]): act_space = MultiDiscrete( [[0, act_space.n - 1] for act_space in total_action_space]) else: act_space = spaces.Tuple(total_action_space) self.action_spaces.append(act_space) else: self.action_spaces.append(total_action_space[0]) # observation space obs_dim = len(observation_callback(agent, self.world)) obs_shapes.append((obs_dim, )) self.observation_spaces.append( spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim, ), dtype=np.float32)) agent.action.c = np.zeros(self.world.dim_c) # action has 8 values: # nothing, +forcex, -forcex, +forcey, -forcey, +rot, -rot, shoot self.action_space = Box(low=0., high=1., shape=((world.dim_p) * 2 + 2, )) ## self.observation_space = Box(low=-np.inf, high=+np.inf, shape=(self.world.numAgents, obs_shapes[0][0] + 2)) if self.with_oppo_modelling: self.observation_space = Box(low=-np.inf, high=+np.inf, shape=(self.world.numAgents, obs_shapes[0][0] + 3)) # Create set of learners here self.master = None #self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces) self.action_range = [0., 1.] # rendering self.shared_viewer = shared_viewer if self.shared_viewer: self.viewers = [None] else: self.viewers = [None] * self.n self.prevShot, self.shot = False, False # used for rendering self._reset_render()
def __init__(self, world, reset_callback=None, reward_callback=None, observation_callback=None, info_callback=None, done_callback=None, shared_viewer=True): self.world = world self.world.discrete_action = True self.agents = self.world.policy_agents # set required vectorized gym env property self.n = len(world.policy_agents) # scenario callbacks self.reset_callback = reset_callback self.reward_callback = reward_callback self.observation_callback = observation_callback self.info_callback = info_callback self.done_callback = done_callback # environment parameters self.discrete_action_space = True # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector self.discrete_action_input = False # if true, even the action is continuous, action will be performed discretely self.force_discrete_action = world.discrete_action if hasattr(self.world, 'discrete_action') else False # if true, every agent has the same reward self.shared_reward = world.collaborative if hasattr(self.world, 'collaborative') else False self.time = 0 # configure spaces self.action_space = [] self.observation_space = [] obs_shapes = [] self.agent_num = len(self.agents) ## I think everything inside this loop is useless, self.action_spaces is used later and not self.action_space for agent in self.agents: total_action_space = [] # physical action space if self.discrete_action_space: u_action_space = spaces.Discrete((world.dim_p-1) * 2 + 3) ## else: u_action_space = spaces.Box(low=-agent.u_range, high=+agent.u_range, shape=(world.dim_p,), dtype=np.float32) if agent.movable: total_action_space.append(u_action_space) # communication action space if self.discrete_action_space: c_action_space = spaces.Discrete(world.dim_c) else: c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c,), dtype=np.float32) if not agent.silent: total_action_space.append(c_action_space) # total action space if len(total_action_space) > 1: # all action spaces are discrete, so simplify to MultiDiscrete action space if all([isinstance(act_space, spaces.Discrete) for act_space in total_action_space]): act_space = MultiDiscrete([[0, act_space.n - 1] for act_space in total_action_space]) else: act_space = spaces.Tuple(total_action_space) self.action_space.append(act_space) else: self.action_space.append(total_action_space[0]) # observation space obs_dim = len(observation_callback(agent, self.world)) obs_shapes.append((obs_dim,)) self.observation_space.append(spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim,), dtype=np.float32)) agent.action.c = np.zeros(self.world.dim_c) # simpified for non-comm game # self.action_spaces = MASpace(tuple(Box(low=-1., high=1., shape=(1,)) for _ in range(self.agent_num))) # self.observation_spaces = MASpace(tuple(Discrete(1) for _ in range(self.agent_num))) # action originally had 5 values - accel, +forcex, -forcex, +forcey, -forcey # I added extra 2 components, change in rotation angle and shoot self.action_spaces = MASpace(tuple(Box(low=0., high=1., shape=((world.dim_p-1) * 2 + 3,)) for _ in range(self.agent_num))) ## self.observation_spaces = MASpace(tuple(Box(low=-np.inf, high=+np.inf, shape=obs_shape) for obs_shape in obs_shapes)) self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces) self.action_range = [0., 1.] # rendering self.shared_viewer = shared_viewer if self.shared_viewer: self.viewers = [None] else: self.viewers = [None] * self.n mixer.init() soundFiles = gym_fortattack.__file__[:-11]+'envs/Game/' # bulletFile = os.path.realpath(__file__)[:-13]+'Game/bullet.mp3' mixer.music.load(soundFiles+'bullet.mp3') # print(gym_fortattack.__file__) # time.sleep(5) self.prevShot, self.shot = False, False # used for rendering self._reset_render()
def __init__(self, world, reset_callback=None, reward_callback=None, observation_callback=None, info_callback=None, done_callback=None, shared_viewer=True, reset_vehicle_callback=None): super(MultiAgentEnv, self).__init__(world, reset_callback, reward_callback, observation_callback, info_callback, done_callback, shared_viewer) # reset discreate action params self.discrete_action_space = False self.discrete_action_input = False self.force_discrete_action = False # reset action space from gym import spaces from multiagent.multi_discrete import MultiDiscrete self.action_space = [] for agent in self.agents: total_action_space = [] # physical action space if self.discrete_action_space: u_action_space = spaces.Discrete(world.dim_p * 2 + 1) else: u_action_space = spaces.Box(low=-agent.u_range, high=+agent.u_range, shape=(world.dim_p, ), dtype=np.float32) if agent.movable: total_action_space.append(u_action_space) # communication action space if self.discrete_action_space: c_action_space = spaces.Discrete(world.dim_c) else: c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c, ), dtype=np.float32) if not agent.silent: total_action_space.append(c_action_space) # total action space if len(total_action_space) > 1: # all action spaces are discrete, so simplify to MultiDiscrete action space if all([ isinstance(act_space, spaces.Discrete) for act_space in total_action_space ]): act_space = MultiDiscrete( [[0, act_space.n - 1] for act_space in total_action_space]) else: act_space = spaces.Tuple(total_action_space) self.action_space.append(act_space) else: self.action_space.append(total_action_space[0]) self.reset_vehicle_callback = reset_vehicle_callback self.render_geoms = None self.render_geoms_xform = None
def __init__(self, world, env_spec, reset_callback=None, reward_callback=None, observation_callback=None, info_callback=None, done_callback=None, shared_viewer=True): self.world = world self.env_spec = env_spec self.agents = self.world.policy_agents # set required vectorized gym env property self.n = len(world.policy_agents) # scenario callbacks self.reset_callback = reset_callback self.reward_callback = reward_callback self.observation_callback = observation_callback self.info_callback = info_callback self.done_callback = done_callback # environment parameters self.discrete_action_space = True # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector self.discrete_action_input = False # if true, even the action is continuous, action will be performed discretely self.force_discrete_action = world.discrete_action if hasattr( world, 'discrete_action') else False # if true, every agent has the same reward self.shared_reward = world.collaborative if hasattr( world, 'collaborative') else False self.time = 0 # configure spaces self.action_space = [] self.observation_space = [] for agent in self.agents: total_action_space = [] # physical action space if self.discrete_action_space: u_action_space = spaces.Discrete(world.dim_p * 2 + 1) else: u_action_space = spaces.Box(low=-agent.u_range, high=+agent.u_range, shape=(world.dim_p, ), dtype=np.float32) if agent.movable: total_action_space.append(u_action_space) # communication action space if self.discrete_action_space: c_action_space = spaces.Discrete(world.dim_c) else: c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c, ), dtype=np.float32) if not agent.silent: total_action_space.append(c_action_space) # total action space if len(total_action_space) > 1: # all action spaces are discrete, so simplify to MultiDiscrete action space if all([ isinstance(act_space, spaces.Discrete) for act_space in total_action_space ]): act_space = MultiDiscrete( [[0, act_space.n - 1] for act_space in total_action_space]) else: act_space = spaces.Tuple(total_action_space) self.action_space.append(act_space) else: self.action_space.append(total_action_space[0]) # observation space obs_dim = len(observation_callback(agent, self.world)) self.observation_space.append( spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim, ), dtype=np.float32)) agent.action.c = np.zeros(self.world.dim_c) # rendering self.shared_viewer = shared_viewer # import rendering only if needed. TODO Add a global rendering flag from multiagent import rendering # TODO Create a debug flag for this w = self.env_spec.width * 2 + 0.2 h = self.env_spec.height * 2 + 0.2 self.world_bound_geom = rendering.make_polygon([(0, 0), (w, 0), (w, h), (0, h)], False) self.world_bound_geom.set_color(0., 0., 0., alpha=0.3) xform = rendering.Transform() xform.set_translation(-w / 2, -h / 2) self.world_bound_geom.add_attr(xform) if self.shared_viewer: self.viewers = [rendering.Viewer(700, 700)] else: self.viewers = [rendering.Viewer(700, 700) for i in self.n] for i, viewer in enumerate(self.viewers): viewer.cam_range = self.env_spec.zoom self._reset_render()
def __init__(self, world, reset_callback=None, reward_callback=None, observation_callback=None, info_callback=None, done_callback=None, shared_viewer=True): #1. word, agent, n (number of agents), time, and two array action space and observation space self.world = world self.agents = self.world.policy_agents self.time = 0 # set required vectorized gym env property self.n = len(world.policy_agents) self.action_space = [] self.observation_space = [] # scenario callbacks self.reset_callback = reset_callback self.reward_callback = reward_callback self.observation_callback = observation_callback self.info_callback = info_callback self.done_callback = done_callback # environment parameters self.discrete_action_space = True # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector # one-hot mean change the categorical variable to only 0 and 1 for existence. # // see reference : https://hackernoon.com/what-is-one-hot-encoding-why-and-when-do-you-have-to-use-it-e3c6186d008f self.discrete_action_input = False # if true, even the action is continuous, action will be performed discretely self.force_discrete_action = world.discrete_action if hasattr( world, 'discrete_action') else False # if true, every agent has the same reward self.shared_reward = world.collaborative if hasattr( world, 'collaborative') else False ## configure spaces self.action_space = [] self.observation_space = [] #2. useful, 2.1, 2.2, 2.4, 2.5 for agent in self.agents: total_action_space = [ ] #including agent_action_space, communication_action_space # physical action space ##2.1 # physical action space if self.discrete_action_space: # dim_p is 2 for 2-dimensional space in this case, we can also change the value on world class, world class also have the communication channel dim # might also to use it when do the language tranining # in here the u_action_space is Discrete type, but it is actually a tuple, the result is 5 on the # parenthesis u_action_space = spaces.Discrete(world.dim_p * 2 + 1) # = tuple else: ## reference to : https://stackoverflow.com/questions/44404281/openai-gym-understanding-action-space-notation-spaces-box u_action_space = spaces.Box(low=-agent.u_range, high=+agent.u_range, shape=(world.dim_p, ), dtype=np.float32) #2.2 if agent.movable: ## u_action_space might be up, down, left and right total_action_space.append(u_action_space) # communication action space #2.3 if self.discrete_action_space: ## TODO why not * 2 + 1 as same as the pyhsical action c_action_space = spaces.Discrete(world.dim_c * 2 + 1) else: c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c, ), dtype=np.float32) #2.4 ## agent can send signal by current setting, thus, we have an communication action space if not agent.silent: total_action_space.append(c_action_space) #2.5 # total action space if len(total_action_space) > 1: # all action spaces are discrete, so simplify to MultiDiscrete action space if all([ isinstance(act_space, spaces.Discrete) for act_space in total_action_space ]): ## check the source code, because we do not have the communcation channel [0, -1], we only have physical action [0,4] act_space = MultiDiscrete( [[0, act_space.n - 1] for act_space in total_action_space]) else: act_space = spaces.Tuple(total_action_space) self.action_space.append(act_space) else: self.action_space.append(total_action_space[0]) # observation space ## TODO ## default value for obervation_callback is none, might need to check the senario obs_dim = len(observation_callback(agent, self.world)) ## check more for the BOX in gym ## really value between - inf to + inf for the observation? self.observation_space.append( spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim, ), dtype=np.float32)) ## currently an size 0 array agent.action.c = np.zeros(self.world.dim_c) # rendering self.shared_viewer = shared_viewer if self.shared_viewer: self.viewers = [None] else: self.viewers = [None] * self.n self._reset_render()
def new_act_space(self): total_action_space = [] u_act_space=spaces.Discrete(self.world.dim_p*2+1) total_action_space.append(u_act_space) return MultiDiscrete([[0, act_space.n - 1] for act_space in total_action_space])