def basic_different_dec_cardinality() -> MACID: """A basic MACIM where the cardinality of each agent's decision node is different. It has one subgame perfect NE. """ macid = MACID([('D1', 'D2'), ('D1', 'U1'), ('D1', 'U2'), ('D2', 'U2'), ('D2', 'U1')], { 0: { 'D': ['D1'], 'U': ['U1'] }, 1: { 'D': ['D2'], 'U': ['U2'] } }) cpd_d1 = DecisionDomain('D1', [0, 1]) cpd_d2 = DecisionDomain('D2', [0, 1, 2]) agent1_payoff = np.array([[3, 1, 0], [1, 2, 3]]) agent2_payoff = np.array([[1, 2, 1], [1, 0, 3]]) cpd_u1 = FunctionCPD('U1', lambda d1, d2: agent1_payoff[d1, d2], evidence=['D1', 'D2']) cpd_u2 = FunctionCPD('U2', lambda d1, d2: agent2_payoff[d1, d2], evidence=['D1', 'D2']) macid.add_cpds(cpd_d1, cpd_d2, cpd_u1, cpd_u2) return macid
def get_sequential_cid() -> CID: """ This CID is a subtle case of sufficient recall, as the strategy for D1 influences the expected utility of D2, but D2 can still be chosen without knowing D1, since D1 does not influence any utility nodes descending from D2. """ cid = CID([ ('S1', 'D1'), ('D1', 'U1'), ('S1', 'U1'), ('D1', 'S2'), ('S2', 'D2'), ('D2', 'U2'), ('S2', 'U2'), ], decision_nodes=['D1', 'D2'], utility_nodes=['U1', 'U2']) cid.add_cpds( UniformRandomCPD('S1', [0, 1]), DecisionDomain('D1', [0, 1]), FunctionCPD('U1', lambda s1, d1: int(s1 == d1), evidence=['S1', 'D1']), FunctionCPD('S2', lambda d1: d1, evidence=['D1']), DecisionDomain('D2', [0, 1]), FunctionCPD('U2', lambda s2, d2: int(s2 == d2), evidence=['S2', 'D2']), ) return cid
def modified_taxi_competition() -> MACID: """ Modifying the payoffs in the taxi competition example so that there is a tie break (if taxi 1 chooses to stop in front of the expensive hotel, taxi 2 is indifferent between their choices.) - There are now two SPNE D1 +----------+----------+----------+ | taxi 1 | expensive| cheap | +----------+----------+----------+ |expensive | 2 | 3 | D2 +----------+----------+----------+ | cheap | 5 | 1 | +----------+----------+----------+ D1 +----------+----------+----------+ | taxi 2 | expensive| cheap | +----------+----------+----------+ |expensive | 2 | 5 | D2 +----------+----------+----------+ | cheap | 3 | 5 | +----------+----------+----------+ """ macid = MACID([('D1', 'D2'), ('D1', 'U1'), ('D1', 'U2'), ('D2', 'U2'), ('D2', 'U1')], { 1: { 'D': ['D1'], 'U': ['U1'] }, 2: { 'D': ['D2'], 'U': ['U2'] } }) d1_domain = ['e', 'c'] d2_domain = ['e', 'c'] cpd_d1 = DecisionDomain('D1', d1_domain) cpd_d2 = DecisionDomain('D2', d2_domain) agent1_payoff = np.array([[2, 3], [5, 1]]) agent2_payoff = np.array([[2, 5], [3, 5]]) cpd_u1 = FunctionCPD('U1', lambda d1, d2: agent1_payoff[d2_domain.index(d2), d1_domain.index(d1)], evidence=['D1', 'D2']) cpd_u2 = FunctionCPD('U2', lambda d1, d2: agent2_payoff[d2_domain.index(d2), d1_domain.index(d1)], evidence=['D1', 'D2']) macid.add_cpds(cpd_d1, cpd_d2, cpd_u1, cpd_u2) return macid
def taxi_competition() -> MACID: """ A MACIM for the "Taxi Competition" example introduced in "Equilibrium Refinements for Multi-Agent Influence Diagrams: Theory and Practice" by Hammond, Fox, Everitt, Abate & Wooldridge, 2021: D1 +----------+----------+----------+ | taxi 1 | expensive| cheap | +----------+----------+----------+ |expensive | 2 | 3 | D2 +----------+----------+----------+ | cheap | 5 | 1 | +----------+----------+----------+ D1 +----------+----------+----------+ | taxi 2 | expensive| cheap | +----------+----------+----------+ |expensive | 2 | 5 | D2 +----------+----------+----------+ | cheap | 3 | 1 | +----------+----------+----------+ - There are 3 pure startegy NE and 1 pure SPE. """ macid = MACID([('D1', 'D2'), ('D1', 'U1'), ('D1', 'U2'), ('D2', 'U2'), ('D2', 'U1')], { 1: { 'D': ['D1'], 'U': ['U1'] }, 2: { 'D': ['D2'], 'U': ['U2'] } }) d1_domain = ['e', 'c'] d2_domain = ['e', 'c'] cpd_d1 = DecisionDomain('D1', d1_domain) cpd_d2 = DecisionDomain('D2', d2_domain) agent1_payoff = np.array([[2, 3], [5, 1]]) agent2_payoff = np.array([[2, 5], [3, 1]]) cpd_u1 = FunctionCPD('U1', lambda d1, d2: agent1_payoff[d2_domain.index(d2), d1_domain.index(d1)], evidence=['D1', 'D2']) cpd_u2 = FunctionCPD('U2', lambda d1, d2: agent2_payoff[d2_domain.index(d2), d1_domain.index(d1)], evidence=['D1', 'D2']) macid.add_cpds(cpd_d1, cpd_d2, cpd_u1, cpd_u2) return macid
def battle_of_the_sexes() -> MACID: """ This macim is a representation of the battle of the sexes game (also known as Bach or Stravinsky). It is a simultaneous symmetric two-player game with payoffs corresponding to the following normal form game - the row player is Female and the column player is Male: +----------+----------+----------+ | |Opera | Football | +----------+----------+----------+ | Opera | 3, 2 | 0, 0 | +----------+----------+----------+ | Football | 0, 0 | 2, 3 | +----------+----------+----------+ - This game has two pure NE: (Opera, Football) and (Football, Opera) """ macid = MACID([('D_F', 'U_F'), ('D_F', 'U_M'), ('D_M', 'U_M'), ('D_M', 'U_F')], { 'M': { 'D': ['D_F'], 'U': ['U_F'] }, 'F': { 'D': ['D_M'], 'U': ['U_M'] } }) d_f_domain = ['O', 'F'] d_m_domain = ['O', 'F'] cpd_d_f = DecisionDomain('D_F', d_f_domain) cpd_d_m = DecisionDomain('D_M', d_m_domain) agent_f_payoff = np.array([[3, 0], [0, 2]]) agent_m_payoff = np.array([[2, 0], [0, 3]]) cpd_u_f = FunctionCPD( 'U_F', lambda d_f, d_m: agent_f_payoff[d_f_domain.index(d_f), d_m_domain.index(d_m)], evidence=['D_F', 'D_M']) cpd_u_m = FunctionCPD( 'U_M', lambda d_f, d_m: agent_m_payoff[d_f_domain.index(d_f), d_m_domain.index(d_m)], evidence=['D_F', 'D_M']) macid.add_cpds(cpd_d_f, cpd_d_m, cpd_u_f, cpd_u_m) return macid
def two_agents_three_actions() -> MACID: """ This macim is a representation of a game where two players must decide between threee different actions simultaneously - the row player is agent 1 and the column player is agent 2 - the normal form representation of the payoffs is as follows: +----------+----------+----------+----------+ | | L | C | R | +----------+----------+----------+----------+ | T | 4, 3 | 5, 1 | 6, 2 | +----------+----------+----------+----------+ | M | 2, 1 | 8, 4 | 3, 6 | +----------+----------+----------+----------+ | B | 3, 0 | 9, 6 | 2, 8 | +----------+----------+----------+----------+ - The game has one pure NE (T,L) """ macid = MACID([('D1', 'U1'), ('D1', 'U2'), ('D2', 'U2'), ('D2', 'U1')], { 1: { 'D': ['D1'], 'U': ['U1'] }, 2: { 'D': ['D2'], 'U': ['U2'] } }) d1_domain = ['T', 'M', 'B'] d2_domain = ['L', 'C', 'R'] cpd_d1 = DecisionDomain('D1', d1_domain) cpd_d2 = DecisionDomain('D2', d2_domain) agent1_payoff = np.array([[4, 5, 6], [2, 8, 3], [3, 9, 2]]) agent2_payoff = np.array([[3, 1, 2], [1, 4, 6], [0, 6, 8]]) cpd_u1 = FunctionCPD('U1', lambda d1, d2: agent1_payoff[d1_domain.index(d1), d2_domain.index(d2)], evidence=['D1', 'D2']) cpd_u2 = FunctionCPD('U2', lambda d1, d2: agent2_payoff[d1_domain.index(d1), d2_domain.index(d2)], evidence=['D1', 'D2']) macid.add_cpds(cpd_d1, cpd_d2, cpd_u1, cpd_u2) return macid
def get_5node_cid_with_scaled_utility() -> CID: cid = CID([('S1', 'D'), ('S1', 'U1'), ('S2', 'D'), ('S2', 'U2'), ('D', 'U1'), ('D', 'U2')], decision_nodes=['D'], utility_nodes=['U1', 'U2']) cpd_s1 = UniformRandomCPD('S1', [0, 1]) cpd_s2 = UniformRandomCPD('S2', [0, 1]) cpd_u1 = FunctionCPD('U1', lambda s1, d: 10 * int(s1 == d), evidence=['S1', 'D']) cpd_u2 = FunctionCPD('U2', lambda s2, d: 2 * int(s2 == d), evidence=['S2', 'D']) cpd_d = DecisionDomain('D', [0, 1]) cid.add_cpds(cpd_d, cpd_s1, cpd_s2, cpd_u1, cpd_u2) return cid
def get_2dec_cid() -> CID: cid = CID([('S1', 'S2'), ('S1', 'D1'), ('D1', 'S2'), ('S2', 'U'), ('S2', 'D2'), ('D2', 'U')], decision_nodes=['D1', 'D2'], utility_nodes=['U']) cpd_s1 = UniformRandomCPD('S1', [0, 1]) cpd_d1 = DecisionDomain('D1', [0, 1]) cpd_d2 = DecisionDomain('D2', [0, 1]) cpd_s2 = FunctionCPD('S2', lambda s2, d1: int(s2 == d1), evidence=['S1', 'D1']) cpd_u = FunctionCPD('U', lambda s2, d2: int(s2 == d2), evidence=['S2', 'D2']) cid.add_cpds(cpd_s1, cpd_d1, cpd_s2, cpd_d2, cpd_u) return cid
def get_insufficient_recall_cid() -> CID: cid = CID([('A', 'U'), ('B', 'U')], decision_nodes=['A', 'B'], utility_nodes=['U']) cid.add_cpds(DecisionDomain('A', [0, 1]), DecisionDomain('B', [0, 1]), FunctionCPD('U', lambda a, b: a * b, evidence=['A', 'B'])) return cid
def matching_pennies() -> MACID: """ This macim is a representation of the matching pennies game. It is symmetric two-player game with payoffs corresponding to the following normal form game - the row player is agent 1 and the column player is agent 2: +----------+----------+----------+ | |Heads | Tails | +----------+----------+----------+ | Heads | +1, -1 | -1, +1 | +----------+----------+----------+ | Tails | -1, +1 | +1, -1 | +----------+----------+----------+ - This game has no pure NE, but has a mixed NE where each player chooses Heads or Tails with equal probability. """ macid = MACID([('D1', 'U1'), ('D1', 'U2'), ('D2', 'U2'), ('D2', 'U1')], { 1: { 'D': ['D1'], 'U': ['U1'] }, 2: { 'D': ['D2'], 'U': ['U2'] } }) d1_domain = ['H', 'T'] d2_domain = ['H', 'T'] cpd_d1 = DecisionDomain('D1', d1_domain) cpd_d2 = DecisionDomain('D2', d2_domain) agent1_payoff = np.array([[1, -1], [-1, 1]]) agent2_payoff = np.array([[-1, 1], [1, -1]]) cpd_u1 = FunctionCPD('U1', lambda d1, d2: agent1_payoff[d1_domain.index(d1), d2_domain.index(d2)], evidence=['D1', 'D2']) cpd_u2 = FunctionCPD('U2', lambda d1, d2: agent2_payoff[d1_domain.index(d1), d2_domain.index(d2)], evidence=['D1', 'D2']) macid.add_cpds(cpd_d1, cpd_d2, cpd_u1, cpd_u2) return macid
def prisoners_dilemma() -> MACID: """ This macim is a representation of the canonical prisoner's dilemma. It is a simultaneous symmetric two-player game with payoffs corresponding to the following normal form game - the row player is agent 1 and the column player is agent 2: +----------+----------+----------+ | |Cooperate | Defect | +----------+----------+----------+ |Cooperate | -1, -1 | -3, 0 | +----------+----------+----------+ | Defect | 0, -3 | -2, -2 | +----------+----------+----------+ - This game has one pure NE: (defect, defect) """ macid = MACID([('D1', 'U1'), ('D1', 'U2'), ('D2', 'U2'), ('D2', 'U1')], { 1: { 'D': ['D1'], 'U': ['U1'] }, 2: { 'D': ['D2'], 'U': ['U2'] } }) d1_domain = ['c', 'd'] d2_domain = ['c', 'd'] cpd_d1 = DecisionDomain('D1', d1_domain) cpd_d2 = DecisionDomain('D2', d2_domain) agent1_payoff = np.array([[-1, -3], [0, -2]]) agent2_payoff = np.transpose(agent1_payoff) cpd_u1 = FunctionCPD('U1', lambda d1, d2: agent1_payoff[d1_domain.index(d1), d2_domain.index(d2)], evidence=['D1', 'D2']) cpd_u2 = FunctionCPD('U2', lambda d1, d2: agent2_payoff[d1_domain.index(d1), d2_domain.index(d2)], evidence=['D1', 'D2']) macid.add_cpds(cpd_d1, cpd_d2, cpd_u1, cpd_u2) return macid
def get_3node_cid() -> CID: cid = CID([('S', 'D'), ('S', 'U'), ('D', 'U')], decision_nodes=['D'], utility_nodes=['U']) cpd_s = UniformRandomCPD('S', [0, 1]) cpd_u = FunctionCPD('U', lambda s, d: int(s == d), evidence=['S', 'D']) cpd_d = DecisionDomain('D', [0, 1]) cid.add_cpds(cpd_d, cpd_s, cpd_u) return cid
def intervene(self, intervention: Dict["str", "Any"]) -> None: """Given a dictionary of interventions, replace the CPDs for the relevant nodes. Soft interventions can be achieved by using add_cpds directly. """ for variable, value in intervention.items(): cpd = FunctionCPD(variable, lambda *x: value, evidence=self.get_parents(variable)) self.add_cpds(cpd)
def impute_conditional_expectation_decision(self, d: str, y: str) -> None: """Imputes a policy for d = the expectation of y conditioning on d's parents""" parents = self.get_parents(d) new = self.copy() @lru_cache(maxsize=1000) def cond_exp_policy(*pv: tuple) -> float: context = {p: pv[i] for i, p in enumerate(parents)} return new.expected_value([y], context)[0] self.add_cpds(FunctionCPD(d, cond_exp_policy, parents, label="cond_exp({})".format(y)))
def two_agent_one_pne() -> MACID: """ This macim is a simultaneous two player game and has a parameterisation that corresponds to the following normal form game - where the row player is agent 1, and the column player is agent 2 +----------+----------+----------+ | | Act(0) | Act(1) | +----------+----------+----------+ | Act(0) | 1, 2 | 3, 0 | +----------+----------+----------+ | Act(1) | 0, 3 | 2, 2 | +----------+----------+----------+ """ macid = MACID([('D1', 'U1'), ('D1', 'U2'), ('D2', 'U2'), ('D2', 'U1')], { 1: { 'D': ['D1'], 'U': ['U1'] }, 2: { 'D': ['D2'], 'U': ['U2'] } }) cpd_d1 = DecisionDomain('D1', [0, 1]) cpd_d2 = DecisionDomain('D2', [0, 1]) agent1_payoff = np.array([[1, 3], [0, 2]]) agent2_payoff = np.array([[2, 0], [3, 2]]) cpd_u1 = FunctionCPD('U1', lambda d1, d2: agent1_payoff[d1, d2], evidence=['D1', 'D2']) cpd_u2 = FunctionCPD('U2', lambda d1, d2: agent2_payoff[d1, d2], evidence=['D1', 'D2']) macid.add_cpds(cpd_d1, cpd_d2, cpd_u1, cpd_u2) return macid
def test_introduced_total_effect(self) -> None: cid = get_introduced_bias() cid.impute_random_policy() self.assertEqual(introduced_total_effect(cid, 'A', 'D', 'Y', 0, 1), -0.5) cid.impute_conditional_expectation_decision('D', 'Y') self.assertAlmostEqual(introduced_total_effect(cid, 'A', 'D', 'Y', 0, 1), 0.3333, 2) # Try modified model where X doesn't depend on Z cid = get_introduced_bias() cid.impute_random_policy() cid.add_cpds(FunctionCPD('X', lambda a, z: a, evidence=['A', 'Z'])) cid.impute_conditional_expectation_decision('D', 'Y') self.assertAlmostEqual(introduced_total_effect(cid, 'A', 'D', 'Y', 0, 1), 0, 2) # Try modified model where Y doesn't depend on Z cid = get_introduced_bias() cid.impute_random_policy() cid.add_cpds(FunctionCPD('Y', lambda x, z: x, evidence=['X', 'Z'])) cid.impute_conditional_expectation_decision('D', 'Y') self.assertAlmostEqual(introduced_total_effect(cid, 'A', 'D', 'Y', 0, 1), 0, 2) # Try modified model where Y doesn't depend on X cid = get_introduced_bias() cid.impute_random_policy() cid.add_cpds(FunctionCPD('Y', lambda x, z: z, evidence=['X', 'Z'])) cid.impute_conditional_expectation_decision('D', 'Y') self.assertAlmostEqual(introduced_total_effect(cid, 'A', 'D', 'Y', 0, 1), 0.333, 2)
def get_introduced_bias() -> CID: cid = CID( [ ('A', 'X'), # defining the graph's nodes and edges ('Z', 'X'), ('Z', 'Y'), ('X', 'D'), ('X', 'Y'), ('D', 'U'), ('Y', 'U') ], decision_nodes=['D'], utility_nodes=['U']) cpd_a = UniformRandomCPD('A', [0, 1]) cpd_z = UniformRandomCPD('Z', [0, 1]) cpd_x = FunctionCPD('X', lambda a, z: a * z, evidence=['A', 'Z']) cpd_d = DecisionDomain('D', [0, 1]) cpd_y = FunctionCPD('Y', lambda x, z: x + z, evidence=['X', 'Z']) cpd_u = FunctionCPD('U', lambda d, y: -(d - y)**2, evidence=['D', 'Y']) cid.add_cpds(cpd_a, cpd_d, cpd_z, cpd_x, cpd_y, cpd_u) return cid
def impute_optimal_decision(self, d: str) -> None: """Impute an optimal policy to the given decision node""" self.impute_random_decision(d) card = self.get_cardinality(d) parents = self.get_parents(d) idx2name = self.get_cpds(d).no_to_name[d] state_names = self.get_cpds(d).state_names utility_nodes = self.utility_nodes_agent[self.whose_node[d]] descendant_utility_nodes = list(set(utility_nodes).intersection(nx.descendants(self, d))) new = self.copy() # this "freezes" the policy so it doesn't adapt to future interventions @lru_cache(maxsize=1000) def opt_policy(*pv: tuple) -> Any: nonlocal descendant_utility_nodes context: Dict[str, Any] = {p: pv[i] for i, p in enumerate(parents)} eu = [] for d_idx in range(card): context[d] = d_idx # TODO should this be id2name[d_idx]? eu.append(new.expected_value(descendant_utility_nodes, context)) return idx2name[np.argmax(eu)] self.add_cpds(FunctionCPD(d, opt_policy, parents, state_names=state_names, label="opt"))
def pure_decision_rules(self, decision: str) -> List[FunctionCPD]: """Return a list of the decision rules available at the given decision""" cpd: TabularCPD = self.get_cpds(decision) evidence_card = cpd.cardinality[1:] parents = cpd.variables[1:] state_names = cpd.state_names[decision] # We begin by representing each possible decision as a list values, with length # equal the number of decision contexts functions_as_lists = list( itertools.product(state_names, repeat=np.product(evidence_card))) def arg2idx(parent_values: tuple) -> int: """Convert a decision context into an index for the function list""" idx = 0 for i, pv in enumerate(parent_values): name_to_no: Dict[Any, int] = self.get_cpds( parents[i]).name_to_no[parents[i]] idx += name_to_no[pv] * np.product(evidence_card[:i]) assert 0 <= idx <= len(functions_as_lists) return idx function_cpds: List[FunctionCPD] = [] for func_list in functions_as_lists: def function(*parent_values: tuple, early_eval_func_list: tuple = func_list) -> Any: return early_eval_func_list[arg2idx(parent_values)] function_cpds.append( FunctionCPD(decision, function, cpd.variables[1:], state_names=cpd.state_names)) return function_cpds
def test_initialize_function_cpd(self) -> None: cid = get_minimal_cid() cpd_a = FunctionCPD('A', lambda: 2, evidence=[]) cpd_a.initialize_tabular_cpd(cid) self.assertTrue(cpd_a.get_values(), np.array([[1]])) self.assertEqual(cpd_a.get_cardinality(['A'])['A'], 1) self.assertEqual(cpd_a.get_state_names('A', 0), 2) cpd_b = FunctionCPD('B', lambda x: x, evidence=['A']) cpd_b.initialize_tabular_cpd(cid) self.assertTrue(cpd_a.get_values(), np.array([[1]])) self.assertEqual(cpd_a.get_cardinality(['A'])['A'], 1) self.assertEqual(cpd_a.get_state_names('A', 0), 2)
def get_minimal_cid() -> CID: cid = CID([('A', 'B')], decision_nodes=['A'], utility_nodes=['B']) cpd_a = DecisionDomain('A', [0, 1]) cpd_b = FunctionCPD('B', lambda a: a, evidence=['A']) cid.add_cpds(cpd_a, cpd_b) return cid