def __init__(self, config=None): # Configuration self.config = config if not self.config: self.config = self.DEFAULT_CONFIG.copy() # Seeding self.np_random = None self.seed() # Scene self.road = None self.vehicle = None # Spaces self.observation = None self.define_spaces() self.level_agent_num = 2 self.action_num = 5 self.num_state = 8 #self.num_state = (2 * self.level_agent_num + 1) * 5 self.agent_num = 2 self.leader_num = 1 self.follower_num = 1 self.merge_start_x = 220 self.merge_end_x = 310 self.next_put_x = 500 self.agents = [] self.train_agents = [] self.is_vehicles_valid = [False] * self.agent_num self.action_spaces = MASpace( tuple(Discrete(self.action_num) for _ in range(self.agent_num))) self.observation_spaces = MASpace( tuple(Discrete(self.num_state) for _ in range(self.agent_num))) self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces) # Running self.time = 0 self.done = False # Rendering self.viewer = None self.automatic_rendering_callback = None self.should_update_rendering = True self.rendering_mode = 'human' self.enable_auto_render = False self.good_merge_flag = False self.episode_merge_record = [] self.episodes_reward_0 = [] self.episodes_reward_1 = [] self.episode_target_merge_record = [] self.sim_max_step = 5 self.epsilon = 0.3
def __init__(self, nS, nA, P, isd): self.P = P self.isd = isd self.lastaction = None # for rendering self.nS = nS self.nA = nA # self.action_space = MASpace(spaces.Discrete(self.nA)) # self.observation_space = MASpace(spaces.Discrete(self.nS)) self.action_spaces = MASpace( tuple(spaces.Discrete(5) for _ in range(2))) self.observation_spaces = MASpace( tuple(spaces.Discrete(nS) for _ in range(2))) self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces) self.seed() self.s = categorical_sample(self.isd, self.np_random) self.lastaction = None
def __init__(self, game_name, agent_num, action_num, payoff=None, repeated=False, max_step=25, memory=0, discrete_action=True, tuple_obs=False): self.game_name = game_name self.agent_num = agent_num self.action_num = action_num self.discrete_action = discrete_action self.tuple_obs = tuple_obs self.num_state = 1 game_list = MatrixGame.get_game_list() if not self.game_name in game_list: raise EnvironmentNotFound( f"The game {self.game_name} doesn't exists") expt_num_agent = game_list[self.game_name]['agent_num'] expt_num_action = game_list[self.game_name]['action_num'] if expt_num_agent != self.agent_num: raise WrongNumberOfAgent(f"The number of agent \ required for {self.game_name} is {expt_num_agent}") if expt_num_action != self.action_num: raise WrongNumberOfAction(f"The number of action \ required for {self.game_name} is {expt_num_action}") self.action_spaces = MASpace( tuple( Box(low=-1., high=1., shape=(1, )) for _ in range(self.agent_num))) self.observation_spaces = MASpace( tuple(Discrete(1) for _ in range(self.agent_num))) if self.discrete_action: self.action_spaces = MASpace( tuple(Discrete(action_num) for _ in range(self.agent_num))) if memory == 0: self.observation_spaces = MASpace( tuple(Discrete(1) for _ in range(self.agent_num))) elif memory == 1: self.observation_spaces = MASpace( tuple(Discrete(5) for _ in range(self.agent_num))) else: self.action_range = [-1., 1.] self.action_spaces = MASpace( tuple( Box(low=-1., high=1., shape=(1, )) for _ in range(self.agent_num))) if memory == 0: self.observation_spaces = MASpace( tuple(Discrete(1) for _ in range(self.agent_num))) elif memory == 1: self.observation_spaces = MASpace( tuple( Box(low=-1., high=1., shape=(12, )) for _ in range(self.agent_num))) self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces) self.t = 0 self.repeated = repeated self.max_step = max_step self.memory = memory self.previous_action = 0 self.previous_actions = [] self.ep_rewards = np.zeros(2) if payoff is not None: payoff = np.array(payoff) assert payoff.shape == tuple([agent_num] + [action_num] * agent_num) self.payoff = payoff if payoff is None: self.payoff = np.zeros( tuple([agent_num] + [action_num] * agent_num)) if self.game_name == 'coordination_0_0': self.payoff[0] = [[1, -1], [-1, -1]] self.payoff[1] = [[1, -1], [-1, -1]] elif self.game_name == 'coordination_same_action_with_preference': self.payoff[0] = [[40, 0], [80, 20]] self.payoff[1] = [[40, 0], [0, 20]] elif self.game_name == 'zero_sum_nash_0_1': # payoff tabular of zero-sum game scenario. nash equilibrium: (Agenat1's action=0,Agent2's action=1) self.payoff[0] = [[5, 2], [-1, 6]] self.payoff[1] = [[-5, -2], [1, -6]] elif self.game_name == 'matching_pennies': # payoff tabular of zero-sumgame scenario. matching pennies self.payoff[0] = [[1, -1], [-1, 1]] self.payoff[1] = [[-1, 1], [1, -1]] elif self.game_name == 'matching_pennies_3': self.payoff[0] = [[[1, -1], [-1, 1]], [[1, -1], [-1, 1]]] self.payoff[1] = [[[1, -1], [1, -1]], [[-1, 1], [-1, 1]]] self.payoff[2] = [[[-1, -1], [1, 1]], [[1, 1], [-1, -1]]] elif self.game_name == 'prison_lola': self.payoff[0] = [[-1, -3], [0, -2]] self.payoff[1] = [[-1, 0], [-3, -2]] elif self.game_name == 'prison': self.payoff[0] = [[3, 1], [4, 2]] self.payoff[1] = [[3, 4], [1, 2]] elif self.game_name == 'stag_hunt': self.payoff[0] = [[4, 1], [3, 2]] self.payoff[1] = [[4, 3], [1, 2]] elif self.game_name == 'chicken': # snowdrift self.payoff[0] = [[3, 2], [4, 1]] self.payoff[1] = [[3, 4], [2, 1]] elif self.game_name == 'harmony': self.payoff[0] = [[4, 3], [2, 1]] self.payoff[1] = [[4, 2], [3, 1]] elif self.game_name == 'wolf_05_05': self.payoff[0] = [[0, 3], [1, 2]] self.payoff[1] = [[3, 2], [0, 1]] # \alpha, \beta = 0, 0.9, nash is 0.5 0.5 # Q tables given, matian best response, learn a nash e. elif self.game_name == 'climbing': self.payoff[0] = [[20, 0, 0], [30, 10, 0], [0, 0, 5]] self.payoff[1] = [[15, 0, 0], [0, 5, 0], [0, 0, 10]] elif self.game_name == 'penalty': self.payoff[0] = [[15, 10, 0], [10, 10, 0], [0, 0, 30]] self.payoff[1] = [[15, 10, 0], [10, 10, 0], [0, 0, 30]] elif self.game_name == 'rock_paper_scissors': self.payoff[0] = [[0, -1, 1], [1, 0, -1], [-1, 1, 0]] self.payoff[1] = [[0, 1, -1], [-1, 0, 1], [1, -1, 0]] self.rewards = np.zeros((self.agent_num, ))
def __init__(self, game_name, agent_num, action_range=(-10, 10)): self.game_name = game_name self.agent_num = agent_num self.action_range = action_range game_list = DifferentialGame.get_game_list() if not self.game_name in game_list: raise EnvironmentNotFound(f"The game {self.game_name} doesn't exists") expt_num_agent = game_list[self.game_name]['agent_num'] if expt_num_agent != self.agent_num: raise WrongNumberOfAgent(f"The number of agent \ required for {self.game_name} is {expt_num_agent}") self.action_spaces = MASpace(tuple(Box(low=-1., high=1., shape=(1,)) for _ in range(self.agent_num))) self.observation_spaces = MASpace(tuple(Box(low=-1., high=1., shape=(1,)) for _ in range(self.agent_num))) self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces) self.t = 0 self.payoff = {} if self.game_name == 'zero_sum': self.payoff[0] = lambda a1, a2: a1 * a2 self.payoff[1] = lambda a1, a2: -a1 * a2 elif self.game_name == 'trigonometric': self.payoff[0] = lambda a1, a2: np.cos(a2) * a1 self.payoff[1] = lambda a1, a2: np.sin(a1) * a2 elif self.game_name == 'mataching_pennies': self.payoff[0] = lambda a1, a2: (a1-0.5)*(a2-0.5) self.payoff[1] = lambda a1, a2: (a1-0.5)*(a2-0.5) elif self.game_name == 'rotational': self.payoff[0] = lambda a1, a2: 0.5 * a1 * a1 + 10 * a1 * a2 self.payoff[1] = lambda a1, a2: 0.5 * a2 * a2 - 10 * a1 * a2 elif self.game_name == 'wolf': def V(alpha, beta, payoff): u = payoff[(0, 0)] - payoff[(0, 1)] - payoff[(1, 0)] + payoff[(1, 1)] return alpha * beta * u + alpha * (payoff[(0, 1)] - payoff[(1, 1)]) + beta * ( payoff[(1, 0)] - payoff[(1, 1)]) + payoff[(1, 1)] payoff_0 = np.array([[0, 3], [1, 2]]) payoff_1 = np.array([[3, 2], [0, 1]]) self.payoff[0] = lambda a1, a2: V(a1, a2, payoff_0) self.payoff[1] = lambda a1, a2: V(a1, a2, payoff_1) elif self.game_name == 'ma_softq': h1 = 0.8 h2 = 1. s1 = 3. s2 = 1. x1 = -5. x2 = 5. y1 = -5. y2 = 5. c = 10. def max_f(a1, a2): f1 = h1 * (-(np.square(a1 - x1) / s1) - (np.square(a2 - y1) / s1)) f2 = h2 * (-(np.square(a1 - x2) / s2) - (np.square(a2 - y2) / s2)) + c return max(f1, f2) self.payoff[0] = lambda a1, a2: max_f(a1, a2) self.payoff[1] = lambda a1, a2: max_f(a1, a2) elif self.game_name == 'stackelberg': c1 = 1000.0 c2 = 1000.0 tot = 5000 self.payoff[0] = lambda a1, a2: a1 * (tot - a1 - a2 - c1) self.payoff[1] = lambda a1, a2: a2 * (tot - a1 - a2 - c2) else: raise EnvironmentNotFound(f"The game {self.game_name} doesn't exists") self.rewards = np.zeros((self.agent_num,))