Esempio n. 1
0
    def __init__(self, config=None):
        # Configuration
        self.config = config
        if not self.config:
            self.config = self.DEFAULT_CONFIG.copy()

        # Seeding
        self.np_random = None
        self.seed()

        # Scene
        self.road = None
        self.vehicle = None

        # Spaces
        self.observation = None
        self.define_spaces()
        self.level_agent_num = 2
        self.action_num = 5
        self.num_state = 8
        #self.num_state = (2 * self.level_agent_num + 1) * 5

        self.agent_num = 2
        self.leader_num = 1
        self.follower_num = 1
        self.merge_start_x = 220
        self.merge_end_x = 310
        self.next_put_x = 500
        self.agents = []
        self.train_agents = []

        self.is_vehicles_valid = [False] * self.agent_num

        self.action_spaces = MASpace(
            tuple(Discrete(self.action_num) for _ in range(self.agent_num)))
        self.observation_spaces = MASpace(
            tuple(Discrete(self.num_state) for _ in range(self.agent_num)))
        self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces)

        # Running
        self.time = 0
        self.done = False

        # Rendering
        self.viewer = None
        self.automatic_rendering_callback = None
        self.should_update_rendering = True
        self.rendering_mode = 'human'
        self.enable_auto_render = False
        self.good_merge_flag = False
        self.episode_merge_record = []
        self.episodes_reward_0 = []
        self.episodes_reward_1 = []
        self.episode_target_merge_record = []
        self.sim_max_step = 5
        self.epsilon = 0.3
    def __init__(self, nS, nA, P, isd):
        self.P = P
        self.isd = isd
        self.lastaction = None  # for rendering
        self.nS = nS
        self.nA = nA

        # self.action_space = MASpace(spaces.Discrete(self.nA))
        # self.observation_space = MASpace(spaces.Discrete(self.nS))
        self.action_spaces = MASpace(
            tuple(spaces.Discrete(5) for _ in range(2)))
        self.observation_spaces = MASpace(
            tuple(spaces.Discrete(nS) for _ in range(2)))
        self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces)

        self.seed()
        self.s = categorical_sample(self.isd, self.np_random)
        self.lastaction = None
Esempio n. 3
0
    def __init__(self,
                 game_name,
                 agent_num,
                 action_num,
                 payoff=None,
                 repeated=False,
                 max_step=25,
                 memory=0,
                 discrete_action=True,
                 tuple_obs=False):
        self.game_name = game_name
        self.agent_num = agent_num
        self.action_num = action_num
        self.discrete_action = discrete_action
        self.tuple_obs = tuple_obs
        self.num_state = 1

        game_list = MatrixGame.get_game_list()

        if not self.game_name in game_list:
            raise EnvironmentNotFound(
                f"The game {self.game_name} doesn't exists")

        expt_num_agent = game_list[self.game_name]['agent_num']
        expt_num_action = game_list[self.game_name]['action_num']

        if expt_num_agent != self.agent_num:
            raise WrongNumberOfAgent(f"The number of agent \
                required for {self.game_name} is {expt_num_agent}")

        if expt_num_action != self.action_num:
            raise WrongNumberOfAction(f"The number of action \
                required for {self.game_name} is {expt_num_action}")

        self.action_spaces = MASpace(
            tuple(
                Box(low=-1., high=1., shape=(1, ))
                for _ in range(self.agent_num)))
        self.observation_spaces = MASpace(
            tuple(Discrete(1) for _ in range(self.agent_num)))

        if self.discrete_action:
            self.action_spaces = MASpace(
                tuple(Discrete(action_num) for _ in range(self.agent_num)))
            if memory == 0:
                self.observation_spaces = MASpace(
                    tuple(Discrete(1) for _ in range(self.agent_num)))
            elif memory == 1:
                self.observation_spaces = MASpace(
                    tuple(Discrete(5) for _ in range(self.agent_num)))
        else:
            self.action_range = [-1., 1.]
            self.action_spaces = MASpace(
                tuple(
                    Box(low=-1., high=1., shape=(1, ))
                    for _ in range(self.agent_num)))
            if memory == 0:
                self.observation_spaces = MASpace(
                    tuple(Discrete(1) for _ in range(self.agent_num)))
            elif memory == 1:
                self.observation_spaces = MASpace(
                    tuple(
                        Box(low=-1., high=1., shape=(12, ))
                        for _ in range(self.agent_num)))

        self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces)

        self.t = 0
        self.repeated = repeated
        self.max_step = max_step
        self.memory = memory
        self.previous_action = 0
        self.previous_actions = []
        self.ep_rewards = np.zeros(2)

        if payoff is not None:
            payoff = np.array(payoff)
            assert payoff.shape == tuple([agent_num] +
                                         [action_num] * agent_num)
            self.payoff = payoff
        if payoff is None:
            self.payoff = np.zeros(
                tuple([agent_num] + [action_num] * agent_num))

        if self.game_name == 'coordination_0_0':
            self.payoff[0] = [[1, -1], [-1, -1]]
            self.payoff[1] = [[1, -1], [-1, -1]]
        elif self.game_name == 'coordination_same_action_with_preference':
            self.payoff[0] = [[40, 0], [80, 20]]
            self.payoff[1] = [[40, 0], [0, 20]]
        elif self.game_name == 'zero_sum_nash_0_1':
            # payoff tabular of zero-sum game scenario. nash equilibrium: (Agenat1's action=0,Agent2's action=1)
            self.payoff[0] = [[5, 2], [-1, 6]]
            self.payoff[1] = [[-5, -2], [1, -6]]
        elif self.game_name == 'matching_pennies':
            # payoff tabular of zero-sumgame scenario. matching pennies
            self.payoff[0] = [[1, -1], [-1, 1]]
            self.payoff[1] = [[-1, 1], [1, -1]]
        elif self.game_name == 'matching_pennies_3':
            self.payoff[0] = [[[1, -1], [-1, 1]], [[1, -1], [-1, 1]]]
            self.payoff[1] = [[[1, -1], [1, -1]], [[-1, 1], [-1, 1]]]
            self.payoff[2] = [[[-1, -1], [1, 1]], [[1, 1], [-1, -1]]]
        elif self.game_name == 'prison_lola':
            self.payoff[0] = [[-1, -3], [0, -2]]
            self.payoff[1] = [[-1, 0], [-3, -2]]
        elif self.game_name == 'prison':
            self.payoff[0] = [[3, 1], [4, 2]]
            self.payoff[1] = [[3, 4], [1, 2]]
        elif self.game_name == 'stag_hunt':
            self.payoff[0] = [[4, 1], [3, 2]]
            self.payoff[1] = [[4, 3], [1, 2]]
        elif self.game_name == 'chicken':  # snowdrift
            self.payoff[0] = [[3, 2], [4, 1]]
            self.payoff[1] = [[3, 4], [2, 1]]
        elif self.game_name == 'harmony':
            self.payoff[0] = [[4, 3], [2, 1]]
            self.payoff[1] = [[4, 2], [3, 1]]
        elif self.game_name == 'wolf_05_05':
            self.payoff[0] = [[0, 3], [1, 2]]
            self.payoff[1] = [[3, 2], [0, 1]]
            # \alpha, \beta = 0, 0.9, nash is 0.5 0.5
            # Q tables given, matian best response, learn a nash e.
        elif self.game_name == 'climbing':
            self.payoff[0] = [[20, 0, 0], [30, 10, 0], [0, 0, 5]]
            self.payoff[1] = [[15, 0, 0], [0, 5, 0], [0, 0, 10]]
        elif self.game_name == 'penalty':
            self.payoff[0] = [[15, 10, 0], [10, 10, 0], [0, 0, 30]]
            self.payoff[1] = [[15, 10, 0], [10, 10, 0], [0, 0, 30]]
        elif self.game_name == 'rock_paper_scissors':
            self.payoff[0] = [[0, -1, 1], [1, 0, -1], [-1, 1, 0]]
            self.payoff[1] = [[0, 1, -1], [-1, 0, 1], [1, -1, 0]]

        self.rewards = np.zeros((self.agent_num, ))
    def __init__(self, game_name, agent_num, action_range=(-10, 10)):
        self.game_name = game_name
        self.agent_num = agent_num
        self.action_range = action_range

        game_list = DifferentialGame.get_game_list()

        if not self.game_name in game_list:
            raise EnvironmentNotFound(f"The game {self.game_name} doesn't exists")

        expt_num_agent = game_list[self.game_name]['agent_num']
        if expt_num_agent != self.agent_num:
            raise WrongNumberOfAgent(f"The number of agent \
                required for {self.game_name} is {expt_num_agent}")

        self.action_spaces = MASpace(tuple(Box(low=-1., high=1., shape=(1,)) for _ in range(self.agent_num)))
        self.observation_spaces = MASpace(tuple(Box(low=-1., high=1., shape=(1,)) for _ in range(self.agent_num)))
        self.env_specs = MAEnvSpec(self.observation_spaces, self.action_spaces)
        self.t = 0
        self.payoff = {}

        if self.game_name == 'zero_sum':
            self.payoff[0] = lambda a1, a2: a1 * a2
            self.payoff[1] = lambda a1, a2: -a1 * a2
        elif self.game_name == 'trigonometric':
            self.payoff[0] = lambda a1, a2: np.cos(a2) * a1
            self.payoff[1] = lambda a1, a2: np.sin(a1) * a2
        elif self.game_name == 'mataching_pennies':
            self.payoff[0] = lambda a1, a2: (a1-0.5)*(a2-0.5)
            self.payoff[1] = lambda a1, a2: (a1-0.5)*(a2-0.5)
        elif self.game_name == 'rotational':
            self.payoff[0] = lambda a1, a2: 0.5 * a1 * a1 + 10 * a1 * a2
            self.payoff[1] = lambda a1, a2: 0.5 * a2 * a2 - 10 * a1 * a2
        elif self.game_name == 'wolf':
            def V(alpha, beta, payoff):
                u = payoff[(0, 0)] - payoff[(0, 1)] - payoff[(1, 0)] + payoff[(1, 1)]
                return alpha * beta * u + alpha * (payoff[(0, 1)] - payoff[(1, 1)]) + beta * (
                            payoff[(1, 0)] - payoff[(1, 1)]) + payoff[(1, 1)]

            payoff_0 = np.array([[0, 3], [1, 2]])
            payoff_1 = np.array([[3, 2], [0, 1]])

            self.payoff[0] = lambda a1, a2: V(a1, a2, payoff_0)
            self.payoff[1] = lambda a1, a2: V(a1, a2, payoff_1)
        elif self.game_name == 'ma_softq':
            h1 = 0.8
            h2 = 1.
            s1 = 3.
            s2 = 1.
            x1 = -5.
            x2 = 5.
            y1 = -5.
            y2 = 5.
            c = 10.
            def max_f(a1, a2):
                f1 = h1 * (-(np.square(a1 - x1) / s1) - (np.square(a2 - y1) / s1))
                f2 = h2 * (-(np.square(a1 - x2) / s2) - (np.square(a2 - y2) / s2)) + c
                return max(f1, f2)
            self.payoff[0] = lambda a1, a2: max_f(a1, a2)
            self.payoff[1] = lambda a1, a2: max_f(a1, a2)
        elif self.game_name == 'stackelberg':
            c1 = 1000.0
            c2 = 1000.0
            tot = 5000
            self.payoff[0] = lambda a1, a2: a1 * (tot - a1 - a2 - c1)
            self.payoff[1] = lambda a1, a2: a2 * (tot - a1 - a2 - c2)
        else:
            raise EnvironmentNotFound(f"The game {self.game_name} doesn't exists")

        self.rewards = np.zeros((self.agent_num,))