def __init__( self, agent_num, game_name="pbeauty", p=0.67, reward_type="abs", action_range=(-1.0, 1.0), ): self.agent_num = agent_num self.p = p self.game_name = game_name self.reward_type = reward_type self.action_range = action_range self.action_spaces = MASpace( tuple(Box(low=-1.0, high=1.0, shape=(1,)) for _ in range(self.agent_num)) ) self.observation_spaces = MASpace( tuple(Discrete(1) for _ in range(self.agent_num)) ) self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces) self.t = 0 self.rewards = np.zeros((self.agent_num,)) if not self.game_name in PBeautyGame.get_game_list(): raise EnvironmentNotFound(f"The game {self.game_name} doesn't exists") if self.game_name == "pbeauty": if ( not self.reward_type in PBeautyGame.get_game_list()[self.game_name]["reward_type"] ): raise RewardTypeNotFound( f"The reward type {self.reward_type} doesn't exists" )
def __init__(self): self.env = gym.make("LunarLanderContinuous-v2") self.agent_num = 2 self.observation_spaces = MASpace( tuple(self.env.observation_space for _ in range(self.agent_num)) ) self.action_spaces = MASpace( tuple([Box(low=-1.0, high=1.0, shape=(1,)) for _ in range(self.agent_num)]) ) self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces)
def __init__(self, agent_num, height): self.agent_num = agent_num self.height = height obs_dim = height * 4 + 1 self.action_range = [0, 1, 2] self.action_spaces = MASpace( tuple( Box(low=0, high=2, shape=(1, ), dtype=np.int32) for _ in range(self.agent_num))) self.observation_spaces = MASpace( tuple( Box(low=0, high=1, shape=(obs_dim, ), dtype=np.int32) for _ in range(self.agent_num))) self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces) self.grid = [] self.busy_n = {}
def __init__(self, env, agent_num=None, action_space=None, observation_space=None): self.env = env self.action_space = action_space self.observation_space = observation_space self.agent_num = agent_num if self.agent_num is None: if hasattr(self.env, "agent_num"): self.agent_num = self.env.agent_num if hasattr(self.env, "n"): self.agent_num = self.env.n if hasattr(self.env, "n_agents"): self.agent_num = self.env.n_agents # print('malib', self.agent_num, env.get_obs_size(), env, env.action_space, env.observation_sapce) self.action_space = self.action_spaces = MASpace( tuple(self.action_space for _ in range(self.agent_num))) if self.observation_space is None: obs_dim = self.env.get_obs_size() self.observation_spaces = MASpace( tuple( gym.spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim, ), dtype=np.float32) for _ in range(self.agent_num))) else: self.observation_spaces = MASpace( tuple(self.observation_space for _ in range(self.agent_num))) # if hasattr(self.env, "action_space") and hasattr(self.env, "observation_sapce"): # self.observation_spaces = MASpace(tuple(gym.spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim,), dtype=np.float32) for _ in range(self.agent_num))) # elif hasattr(self.env, "action_spaces") and hasattr(self.env, "observation_spaces"): # self.action_spaces = self.env.action_spaces # self.observation_spaces = self.env.observation_spaces self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces)
def __init__(self): self.agent_num = 1 self.x = 0 self.y = 50 self.theta = 0 self.v = 2 self.w = 0 self.stoch = 0 self.t = 0 obs_lows = np.array([0, 0, -1. * np.pi / 3., 2., -1.]) obs_highs = np.array([50, 100, np.pi / 3., 5., 1.]) self.observation_spaces = MASpace( tuple([Box(low=obs_lows, high=obs_highs)])) self.action_spaces = MASpace( tuple([Box(low=-1., high=1., shape=(2, ))])) self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces) self.avs = [0] * 10 self.aws = [0] * 10 self.rewards = list( ) # a list that records rewards obtained in current episode, in sequential order.
def __init__( self, game_name, agent_num, action_num, payoff=None, repeated=False, max_step=25, memory=0, discrete_action=True, tuple_obs=True, ): self.game_name = game_name self.agent_num = agent_num self.action_num = action_num self.discrete_action = discrete_action self.tuple_obs = tuple_obs game_list = MatrixGame.get_game_list() if not self.game_name in game_list: raise EnvironmentNotFound( f"The game {self.game_name} doesn't exists") expt_num_agent = game_list[self.game_name]["agent_num"] expt_num_action = game_list[self.game_name]["action_num"] if expt_num_agent != self.agent_num: raise WrongNumberOfAgent(f"The number of agent \ required for {self.game_name} is {expt_num_agent}") if expt_num_action != self.action_num: raise WrongNumberOfAction(f"The number of action \ required for {self.game_name} is {expt_num_action}") self.action_spaces = MASpace( tuple( Box(low=-1.0, high=1.0, shape=(1, )) for _ in range(self.agent_num))) self.observation_spaces = MASpace( tuple(Discrete(1) for _ in range(self.agent_num))) if self.discrete_action: self.action_spaces = MASpace( tuple(Discrete(action_num) for _ in range(self.agent_num))) if memory == 0: self.observation_spaces = MASpace( tuple(Discrete(1) for _ in range(self.agent_num))) elif memory == 1: self.observation_spaces = MASpace( tuple(Discrete(5) for _ in range(self.agent_num))) else: self.action_range = [-1.0, 1.0] self.action_spaces = MASpace( tuple( Box(low=-1.0, high=1.0, shape=(1, )) for _ in range(self.agent_num))) if memory == 0: self.observation_spaces = MASpace( tuple(Discrete(1) for _ in range(self.agent_num))) elif memory == 1: self.observation_spaces = MASpace( tuple( Box(low=-1.0, high=1.0, shape=(12, )) for _ in range(self.agent_num))) self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces) self.t = 0 self.repeated = repeated self.max_step = max_step self.memory = memory self.previous_action = 0 self.previous_actions = [] self.ep_rewards = np.zeros(2) if payoff is not None: payoff = np.array(payoff) assert payoff.shape == tuple([agent_num] + [action_num] * agent_num) self.payoff = payoff if payoff is None: self.payoff = np.zeros( tuple([agent_num] + [action_num] * agent_num)) if self.game_name == "coordination_0_0": self.payoff[0] = [[1, -1], [-1, -1]] self.payoff[1] = [[1, -1], [-1, -1]] elif self.game_name == "coordination_same_action_with_preference": self.payoff[0] = [[2, 0], [0, 1]] self.payoff[1] = [[1, 0], [0, 2]] elif self.game_name == "zero_sum_nash_0_1": # payoff tabular of zero-sum game scenario. nash equilibrium: (Agenat1's action=0,Agent2's action=1) self.payoff[0] = [[5, 2], [-1, 6]] self.payoff[1] = [[-5, -2], [1, -6]] elif self.game_name == "matching_pennies": # payoff tabular of zero-sumgame scenario. matching pennies self.payoff[0] = [[1, -1], [-1, 1]] self.payoff[1] = [[-1, 1], [1, -1]] elif self.game_name == "matching_pennies_3": self.payoff[0] = [[[1, -1], [-1, 1]], [[1, -1], [-1, 1]]] self.payoff[1] = [[[1, -1], [1, -1]], [[-1, 1], [-1, 1]]] self.payoff[2] = [[[-1, -1], [1, 1]], [[1, 1], [-1, -1]]] elif self.game_name == "prison_lola": self.payoff[0] = [[-1, -3], [0, -2]] self.payoff[1] = [[-1, 0], [-3, -2]] elif self.game_name == "prison": self.payoff[0] = [[3, 1], [4, 2]] self.payoff[1] = [[3, 4], [1, 2]] elif self.game_name == "stag_hunt": self.payoff[0] = [[4, 1], [3, 2]] self.payoff[1] = [[4, 3], [1, 2]] elif self.game_name == "chicken": # snowdrift self.payoff[0] = [[3, 2], [4, 1]] self.payoff[1] = [[3, 4], [2, 1]] elif self.game_name == "harmony": self.payoff[0] = [[4, 3], [2, 1]] self.payoff[1] = [[4, 2], [3, 1]] elif self.game_name == "wolf_05_05": self.payoff[0] = [[0, 3], [1, 2]] self.payoff[1] = [[3, 2], [0, 1]] # \alpha, \beta = 0, 0.9, nash is 0.5 0.5 # Q tables given, matian best response, learn a nash e. elif self.game_name == "climbing": self.payoff[0] = [[11, -30, 0], [-30, 7, 6], [0, 0, 5]] self.payoff[1] = [[11, -30, 0], [-30, 7, 6], [0, 0, 5]] elif self.game_name == "penalty": self.payoff[0] = [[10, 0, 0], [0, 2, 0], [0, 0, 10]] self.payoff[1] = [[10, 0, 0], [0, 2, 0], [0, 0, 10]] elif self.game_name == "rock_paper_scissors": self.payoff[0] = [[0, -1, 1], [1, 0, -1], [-1, 1, 0]] self.payoff[1] = [[0, 1, -1], [-1, 0, 1], [1, -1, 0]] self.rewards = np.zeros((self.agent_num, ))
def __init__(self, game_name, agent_num, action_num, state_num, payoff=None, transition=None): self.game_name = game_name self.agent_num = agent_num self.action_num = action_num self.state_num = state_num game_list = StochasticMatrixGame.get_game_list() if not self.game_name in game_list: raise EnvironmentNotFound( f"The game {self.game_name} doesn't exists") expt_num_agent = game_list[self.game_name]['agent_num'] if expt_num_agent != self.agent_num: raise WrongNumberOfAgent(f"The number of agent \ required for {self.game_name} is {expt_num_agent}") expt_num_action = game_list[self.game_name]['action_num'] if expt_num_agent != self.action_num: raise WrongNumberOfAction(f"The number of action \ required for {self.game_name} is {expt_num_action}") expt_num_state = game_list[self.game_name]['state_num'] if expt_num_state != self.state_num: raise WrongNumberOfState(f"The number of state \ required for {self.game_name} is {expt_num_state}") self.action_spaces = MASpace( tuple( Box(low=-1., high=1., shape=(1, )) for _ in range(self.agent_num))) self.observation_spaces = MASpace( tuple(Discrete(1) for _ in range(self.agent_num))) self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces) self.t = 0 if payoff is not None: payoff = np.array(payoff) assert payoff.shape == tuple([state_num, agent_num] + [action_num] * agent_num) self.payoff = payoff if payoff is None: self.payoff = np.zeros( tuple([state_num, agent_num] + [action_num] * agent_num)) if transition is None: self.transition = np.zeros( tuple([state_num] + [action_num] * agent_num + [state_num])) if self.game_name == 'PollutionTax': self.payoff[0][0] = [[4., 3.], [7., 6.]] self.payoff[0][1] = [[5., 8.], [4., 7.]] self.payoff[1][0] = [[1., 0.], [4., 3.]] self.payoff[1][1] = [[2., 5.], [1., 4.]] self.transition[0] = [[[1., 0.], [0., 1.]], [[0., 1.], [0., 1.]]] self.transition[1] = [[[1., 0.], [0., 1.]], [[0., 1.], [0., 1.]]] elif self.game_name == 'three_matrix_games': self.g1 = [[0., 3.], [2., -1.]] self.g2 = [[0., 1.], [4., 3.]] self.g = [['g1', 4.], [5., 'g2']] self.rewards = np.zeros((self.agent_num, )) self.state = 0
def __init__( self, world, reset_callback=None, reward_callback=None, observation_callback=None, info_callback=None, done_callback=None, shared_viewer=True, ): self.world = world self.agents = self.world.policy_agents # set required vectorized gym env property self.n = len(world.policy_agents) # scenario callbacks self.reset_callback = reset_callback self.reward_callback = reward_callback self.observation_callback = observation_callback self.info_callback = info_callback self.done_callback = done_callback # environment parameters self.discrete_action_space = True # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector self.discrete_action_input = False # if true, even the action is continuous, action will be performed discretely self.force_discrete_action = (world.discrete_action if hasattr( world, "discrete_action") else False) # if true, every agent has the same reward self.shared_reward = (world.collaborative if hasattr( world, "collaborative") else False) self.time = 0 # configure spaces self.action_space = [] self.observation_space = [] obs_shapes = [] self.agent_num = len(self.agents) for agent in self.agents: total_action_space = [] # physical action space if self.discrete_action_space: u_action_space = spaces.Discrete(world.dim_p * 2 + 1) else: u_action_space = spaces.Box( low=-agent.u_range, high=+agent.u_range, shape=(world.dim_p, ), dtype=np.float32, ) if agent.movable: total_action_space.append(u_action_space) # communication action space if self.discrete_action_space: c_action_space = spaces.Discrete(world.dim_c) else: c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c, ), dtype=np.float32) if not agent.silent: total_action_space.append(c_action_space) # total action space if len(total_action_space) > 1: # all action spaces are discrete, so simplify to MultiDiscrete action space if all([ isinstance(act_space, spaces.Discrete) for act_space in total_action_space ]): act_space = MultiDiscrete( [[0, act_space.n - 1] for act_space in total_action_space]) else: act_space = spaces.Tuple(total_action_space) self.action_space.append(act_space) else: self.action_space.append(total_action_space[0]) # observation space obs_dim = len(observation_callback(agent, self.world)) obs_shapes.append((obs_dim, )) self.observation_space.append( spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim, ), dtype=np.float32)) agent.action.c = np.zeros(self.world.dim_c) # simpified for non-comm game # self.action_spaces = MASpace(tuple(Box(low=-1., high=1., shape=(1,)) for _ in range(self.agent_num))) # self.observation_spaces = MASpace(tuple(Discrete(1) for _ in range(self.agent_num))) self.action_spaces = MASpace( tuple( Box(low=0.0, high=1.0, shape=(world.dim_p * 2 + 1, )) for _ in range(self.agent_num))) # print(obs_shapes) self.observation_spaces = MASpace( tuple( Box(low=-np.inf, high=+np.inf, shape=obs_shape) for obs_shape in obs_shapes)) self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces) self.action_range = [0.0, 1.0] # rendering self.shared_viewer = shared_viewer if self.shared_viewer: self.viewers = [None] else: self.viewers = [None] * self.n self._reset_render()
def __init__(self, game_name, agent_num, action_range=(-10, 10)): self.game_name = game_name self.agent_num = agent_num self.action_range = action_range game_list = DifferentialGame.get_game_list() if not self.game_name in game_list: raise EnvironmentNotFound( f"The game {self.game_name} doesn't exists") expt_num_agent = game_list[self.game_name]["agent_num"] if expt_num_agent != self.agent_num: raise WrongNumberOfAgent(f"The number of agent \ required for {self.game_name} is {expt_num_agent}") self.action_spaces = MASpace( tuple( Box(low=-1.0, high=1.0, shape=(1, )) for _ in range(self.agent_num))) self.observation_spaces = MASpace( tuple( Box(low=-1.0, high=1.0, shape=(1, )) for _ in range(self.agent_num))) self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces) self.t = 0 self.payoff = {} if self.game_name == "zero_sum": self.payoff[0] = lambda a1, a2: a1 * a2 self.payoff[1] = lambda a1, a2: -a1 * a2 elif self.game_name == "trigonometric": self.payoff[0] = lambda a1, a2: np.cos(a2) * a1 self.payoff[1] = lambda a1, a2: np.sin(a1) * a2 elif self.game_name == "mataching_pennies": self.payoff[0] = lambda a1, a2: (a1 - 0.5) * (a2 - 0.5) self.payoff[1] = lambda a1, a2: (a1 - 0.5) * (a2 - 0.5) elif self.game_name == "rotational": self.payoff[0] = lambda a1, a2: 0.5 * a1 * a1 + 10 * a1 * a2 self.payoff[1] = lambda a1, a2: 0.5 * a2 * a2 - 10 * a1 * a2 elif self.game_name == "wolf": def V(alpha, beta, payoff): u = payoff[(0, 0)] - payoff[(0, 1)] - payoff[(1, 0)] + payoff[ (1, 1)] return (alpha * beta * u + alpha * (payoff[(0, 1)] - payoff[(1, 1)]) + beta * (payoff[(1, 0)] - payoff[(1, 1)]) + payoff[(1, 1)]) payoff_0 = np.array([[0, 3], [1, 2]]) payoff_1 = np.array([[3, 2], [0, 1]]) self.payoff[0] = lambda a1, a2: V(a1, a2, payoff_0) self.payoff[1] = lambda a1, a2: V(a1, a2, payoff_1) elif self.game_name == "ma_softq": h1 = 0.8 h2 = 1.0 s1 = 3.0 s2 = 1.0 x1 = -5.0 x2 = 5.0 y1 = -5.0 y2 = 5.0 c = 10.0 def max_f(a1, a2): f1 = h1 * (-(np.square(a1 - x1) / s1) - (np.square(a2 - y1) / s1)) f2 = h2 * (-(np.square(a1 - x2) / s2) - (np.square(a2 - y2) / s2)) + c return max(f1, f2) self.payoff[0] = lambda a1, a2: max_f(a1, a2) self.payoff[1] = lambda a1, a2: max_f(a1, a2) else: raise EnvironmentNotFound( f"The game {self.game_name} doesn't exists") self.rewards = np.zeros((self.agent_num, ))
def __init__(self, n_pursuers, n_evaders, n_coop=2, n_poison=10, radius=0.015, obstacle_radius=0.2, obstacle_loc=np.array([0.5, 0.5]), ev_speed=0.01, poison_speed=0.01, n_sensors=30, sensor_range=0.2, action_scale=0.01, poison_reward=-1., food_reward=1., encounter_reward=.05, control_penalty=-.5, reward_mech='global', addid=True, speed_features=True, **kwargs): self.n_pursuers = n_pursuers self.n_evaders = n_evaders self.n_coop = n_coop self.n_poison = n_poison self.obstacle_radius = obstacle_radius self.obstacle_loc = obstacle_loc self.poison_speed = poison_speed self.radius = radius self.ev_speed = ev_speed self.n_sensors = n_sensors self.sensor_range = np.ones(self.n_pursuers) * sensor_range self.action_scale = action_scale self.poison_reward = poison_reward self.food_reward = food_reward self.control_penalty = control_penalty self.encounter_reward = encounter_reward self.n_obstacles = 1 self._reward_mech = reward_mech self._addid = addid self._speed_features = speed_features self.seed() self._pursuers = [ Archea(npu + 1, self.radius * 3 / 4, self.n_sensors, self.sensor_range[npu], addid=self._addid, speed_features=self._speed_features) for npu in range(self.n_pursuers) ] self._evaders = [ Archea(nev + 1, self.radius * 3 / 4, self.n_pursuers, self.sensor_range.mean() / 2) for nev in range(self.n_evaders) ] self._poisons = [ Archea(npo + 1, self.radius * 3 / 4, self.n_poison, 0) for npo in range(self.n_poison) ] self.observation_spaces = MASpace( tuple(pursuer.observation_space for pursuer in self._pursuers)) self.action_spaces = MASpace( tuple(pursuer.action_space for pursuer in self._pursuers)) self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces)
def __init__(self, world, reset_callback=None, reward_callback=None, observation_callback=None, info_callback=None, done_callback=None, shared_viewer=True): self.world = world self.world.discrete_action = True self.agents = self.world.policy_agents # set required vectorized gym env property self.n = len(world.policy_agents) # scenario callbacks self.reset_callback = reset_callback self.reward_callback = reward_callback self.observation_callback = observation_callback self.info_callback = info_callback self.done_callback = done_callback # environment parameters self.discrete_action_space = True # if true, action is a number 0...N, otherwise action is a one-hot N-dimensional vector self.discrete_action_input = False # if true, even the action is continuous, action will be performed discretely self.force_discrete_action = world.discrete_action if hasattr(self.world, 'discrete_action') else False # if true, every agent has the same reward self.shared_reward = world.collaborative if hasattr(self.world, 'collaborative') else False self.time = 0 # configure spaces self.action_space = [] self.observation_space = [] obs_shapes = [] self.agent_num = len(self.agents) ## I think everything inside this loop is useless, self.action_spaces is used later and not self.action_space for agent in self.agents: total_action_space = [] # physical action space if self.discrete_action_space: u_action_space = spaces.Discrete((world.dim_p-1) * 2 + 3) ## else: u_action_space = spaces.Box(low=-agent.u_range, high=+agent.u_range, shape=(world.dim_p,), dtype=np.float32) if agent.movable: total_action_space.append(u_action_space) # communication action space if self.discrete_action_space: c_action_space = spaces.Discrete(world.dim_c) else: c_action_space = spaces.Box(low=0.0, high=1.0, shape=(world.dim_c,), dtype=np.float32) if not agent.silent: total_action_space.append(c_action_space) # total action space if len(total_action_space) > 1: # all action spaces are discrete, so simplify to MultiDiscrete action space if all([isinstance(act_space, spaces.Discrete) for act_space in total_action_space]): act_space = MultiDiscrete([[0, act_space.n - 1] for act_space in total_action_space]) else: act_space = spaces.Tuple(total_action_space) self.action_space.append(act_space) else: self.action_space.append(total_action_space[0]) # observation space obs_dim = len(observation_callback(agent, self.world)) obs_shapes.append((obs_dim,)) self.observation_space.append(spaces.Box(low=-np.inf, high=+np.inf, shape=(obs_dim,), dtype=np.float32)) agent.action.c = np.zeros(self.world.dim_c) # simpified for non-comm game # self.action_spaces = MASpace(tuple(Box(low=-1., high=1., shape=(1,)) for _ in range(self.agent_num))) # self.observation_spaces = MASpace(tuple(Discrete(1) for _ in range(self.agent_num))) # action originally had 5 values - accel, +forcex, -forcex, +forcey, -forcey # I added extra 2 components, change in rotation angle and shoot self.action_spaces = MASpace(tuple(Box(low=0., high=1., shape=((world.dim_p-1) * 2 + 3,)) for _ in range(self.agent_num))) ## self.observation_spaces = MASpace(tuple(Box(low=-np.inf, high=+np.inf, shape=obs_shape) for obs_shape in obs_shapes)) self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces) self.action_range = [0., 1.] # rendering self.shared_viewer = shared_viewer if self.shared_viewer: self.viewers = [None] else: self.viewers = [None] * self.n mixer.init() soundFiles = gym_fortattack.__file__[:-11]+'envs/Game/' # bulletFile = os.path.realpath(__file__)[:-13]+'Game/bullet.mp3' mixer.music.load(soundFiles+'bullet.mp3') # print(gym_fortattack.__file__) # time.sleep(5) self.prevShot, self.shot = False, False # used for rendering self._reset_render()