Ejemplos de Strategy en Python, ejemplos de rlpoker.extensive_game.Strategy en Python

Ejemplo n.º 1

0

Mostrar archivo

    def test_expected_value_exact(self):

        game, _, _ = create_neural_rock_paper_scissors()

        strategy1 = extensive_game.Strategy({
            game.info_set_ids[game.get_node(())]:
            extensive_game.ActionFloat({
                'R': 0.5,
                'P': 0.2,
                'S': 0.3
            })
        })

        strategy2 = extensive_game.Strategy({
            game.info_set_ids[game.get_node(('R', ))]:
            extensive_game.ActionFloat({
                'R': 0.2,
                'P': 0.3,
                'S': 0.5
            })
        })

        computed1, computed2 = game.expected_value_exact(strategy1=strategy1,
                                                         strategy2=strategy2)

        expected1 = 0.5 * (0.2 * 0 + 0.3 * -1 + 0.5 * 1) + \
                    0.2 * (0.2 * 1 + 0.3 * 0 + 0.5 * -1) + \
                    0.3 * (0.2 * -1 + 0.3 * 1 + 0.5 * 0)

        self.assertEqual(computed1, expected1)
        self.assertEqual(computed2, -expected1)

Ejemplo n.º 2

0

Mostrar archivo

    def test_is_strategy_complete(self):
        game, _, _ = create_neural_rock_paper_scissors()

        # Incomplete because missing an information set.
        strategy = extensive_game.Strategy({
            ():
            extensive_game.ActionFloat({
                'R': 0.4,
                'P': 0.5,
                'S': 0.1
            })
        })
        computed = game.is_strategy_complete(strategy)
        self.assertEqual(computed, False)

        # Incomplete because missing an action.
        strategy = extensive_game.Strategy({
            ():
            extensive_game.ActionFloat({
                'R': 0.4,
                'P': 0.5,
                'S': 0.1
            }),
            (-1, ):
            extensive_game.ActionFloat({
                'R': 0.4,
                'P': 0.5
            })
        })
        computed = game.is_strategy_complete(strategy)
        self.assertEqual(computed, False)

        # Complete.
        strategy = extensive_game.Strategy({
            ():
            extensive_game.ActionFloat({
                'R': 0.4,
                'P': 0.5,
                'S': 0.1
            }),
            (-1, ):
            extensive_game.ActionFloat({
                'R': 0.4,
                'P': 0.3,
                'S': 0.3
            })
        })
        computed = game.is_strategy_complete(strategy)
        self.assertEqual(computed, True)

Ejemplo n.º 3

0

Mostrar archivo

    def test_copy_strategy(self):
        strategy1 = extensive_game.Strategy({
            'info1':
            extensive_game.ActionFloat({
                'a1': 0.4,
                'a2': 0.6
            }),
            'info2':
            extensive_game.ActionFloat({
                'a2': 0.3,
                'a4': 0.7
            })
        })

        strategy2 = strategy1.copy()

        self.assertEqual(strategy1['info1'], strategy2['info1'])
        self.assertEqual(strategy1['info2'], strategy2['info2'])

        strategy1['info1'] = extensive_game.ActionFloat({'a1': 0.2, 'a2': 0.8})

        self.assertEqual(strategy1['info2'], strategy2['info2'])

        self.assertEqual(strategy2['info1'],
                         extensive_game.ActionFloat({
                             'a1': 0.4,
                             'a2': 0.6
                         }))

Ejemplo n.º 4

0

Mostrar archivo

    def test_equals(self):
        strategy1 = extensive_game.Strategy({
            'info1':
            extensive_game.ActionFloat({
                'a1': 0.4,
                'a2': 0.6
            }),
            'info2':
            extensive_game.ActionFloat({
                'a2': 0.3,
                'a4': 0.7
            })
        })

        # Same as strategy1
        strategy2 = extensive_game.Strategy({
            'info1':
            extensive_game.ActionFloat({
                'a1': 0.4,
                'a2': 0.6
            }),
            'info2':
            extensive_game.ActionFloat({
                'a2': 0.3,
                'a4': 0.7
            })
        })

        # Different to strategy1
        strategy3 = extensive_game.Strategy({
            'info1':
            extensive_game.ActionFloat({
                'a1': 0.3,
                'a2': 0.7
            }),
            'info2':
            extensive_game.ActionFloat({
                'a2': 0.3,
                'a4': 0.7
            })
        })

        self.assertEqual(strategy1, strategy2)
        self.assertNotEqual(strategy1, strategy3)

Ejemplo n.º 5

0

Mostrar archivo

Archivo: test_cfr_metrics.py Proyecto: Michael-Z/rlpoker

    def test_rock_paper_scissors(self):
        game = rock_paper_scissors.create_rock_paper_scissors()

        info_set_1 = game.info_set_ids[game.root]
        info_set_2 = game.info_set_ids[game.root.children['R']]

        # Player 1 plays (R, P, S) with probabilities (0.5, 0.3, 0.2), respectively.
        sigma_1 = extensive_game.Strategy({
            info_set_1:
            extensive_game.ActionFloat({
                'R': 0.5,
                'P': 0.3,
                'S': 0.2
            })
        })

        # Player 2 plays (R, P, S) with probabilities (0.3, 0.3, 0.4), respectively.
        sigma_2 = extensive_game.Strategy({
            info_set_2:
            extensive_game.ActionFloat({
                'R': 0.3,
                'P': 0.3,
                'S': 0.4
            }),
        })

        # Check the values.
        expected_utility_1, expected_utility_2 = cfr_metrics.compute_expected_utility(
            game, sigma_1, sigma_2)

        utility_root = (
            0.5 * (0 * 0.3 + -1 * 0.3 + 1 * 0.4) +  # RR, RP, RS
            0.3 * (1 * 0.3 + 0 * 0.3 + -1 * 0.4) +  # PR, PP, PS
            0.2 * (-1 * 0.3 + 1 * 0.3 + 0 * 0.4))  # SR, SP, SS
        self.assertEqual(expected_utility_1[game.get_node(())], utility_root)

        utility_R = 0 * 0.3 + 1 * 0.3 + -1 * 0.4  # RR + RP + RS
        self.assertEqual(expected_utility_2[game.get_node(('R', ))], utility_R)
        utility_P = -1 * 0.3 + 0 * 0.3 + 1 * 0.4  # PR, PP, PS
        self.assertEqual(expected_utility_2[game.get_node(('P', ))], utility_P)
        utility_S = 1 * 0.3 + -1 * 0.3 + 0 * 0.4  # SR, SP, SS
        self.assertEqual(expected_utility_2[game.get_node(('S', ))], utility_S)

Ejemplo n.º 6

0

Mostrar archivo

    def compute_strategy(self):
        """
        Returns a strategy corresponding to the average strategy.

        Returns:
            strategy:
        """
        strategy = dict()
        for info_set in self.alpha:
            strategy[info_set] = extensive_game.ActionFloat.normalise(
                self.alpha[info_set])

        return extensive_game.Strategy(strategy)

Ejemplo n.º 7

0

Mostrar archivo

    def test_compute_weighted_strategy(self):
        strategies = {
            'info1': [(1.0, extensive_game.ActionFloat({
                'a1': 0.4,
                'a2': 0.6
            })), (2.0, extensive_game.ActionFloat({
                'a1': 0.5,
                'a2': 0.5
            }))],
            'info2': [(3.0, extensive_game.ActionFloat({
                'a1': 0.6,
                'a2': 0.4
            })), (2.0, extensive_game.ActionFloat({
                'a1': 0.3,
                'a2': 0.7
            })), (1.0, extensive_game.ActionFloat({
                'a1': 0.0,
                'a2': 1.0
            }))]
        }

        expected = extensive_game.Strategy({
            'info1':
            extensive_game.ActionFloat({
                'a1': (1.0 * 0.4 + 2.0 * 0.5) / (1.0 + 2.0),
                'a2': (1.0 * 0.6 + 2.0 * 0.5) / (1.0 + 2.0)
            }),
            'info2':
            extensive_game.ActionFloat({
                'a1': (3.0 * 0.6 + 2.0 * 0.3 + 1.0 * 0.0) / (3.0 + 2.0 + 1.0),
                'a2': (3.0 * 0.4 + 2.0 * 0.7 + 1.0 * 1.0) / (3.0 + 2.0 + 1.0)
            })
        })
        computed = extensive_game.compute_weighted_strategy(strategies)

        self.assertEqual(computed, expected)

Ejemplo n.º 8

0

Mostrar archivo

Archivo: test_cfr_metrics.py Proyecto: Michael-Z/rlpoker

    def test_rock_paper_scissors_recursive(self):
        game = rock_paper_scissors.create_rock_paper_scissors()

        info_set_1 = game.info_set_ids[game.root]
        info_set_2 = game.info_set_ids[game.root.children['R']]

        # Player 1 plays (R, P, S) with probabilities (0.5, 0.3, 0.2), respectively.
        sigma_1 = extensive_game.Strategy({
            info_set_1:
            extensive_game.ActionFloat({
                'R': 0.5,
                'P': 0.3,
                'S': 0.2
            })
        })

        # Player 2 plays (R, P, S) with probabilities (0.3, 0.3, 0.4), respectively.
        sigma_2 = extensive_game.Strategy({
            info_set_2:
            extensive_game.ActionFloat({
                'R': 0.3,
                'P': 0.3,
                'S': 0.4
            }),
        })

        # Check that terminal nodes have value equal to their utility to the player.
        terminal_nodes = [
            game.get_node((a1, a2)) for a1 in ['R', 'P', 'S']
            for a2 in ['R', 'P', 'S']
        ]

        for node in terminal_nodes:
            v1, v2 = cfr_metrics.compute_expected_utility_recursive(
                game, node, sigma_1, sigma_2, collections.defaultdict(float),
                collections.defaultdict(float), 1.0, 1.0, 1.0)
            expected_v1 = node.utility[1]
            expected_v2 = node.utility[2]

            self.assertEqual(v1, expected_v1)
            self.assertEqual(v2, expected_v2)

        # Check the values of the player 2 nodes.
        v1, v2 = cfr_metrics.compute_expected_utility_recursive(
            game, game.get_node(('R', )), sigma_1, sigma_2,
            collections.defaultdict(float), collections.defaultdict(float),
            0.5, 1.0, 1.0)
        self.assertEqual(v1, 0 * 0.3 + -1 * 0.3 + 1 * 0.4)
        self.assertEqual(v2, 0 * 0.3 + 1 * 0.3 + -1 * 0.4)

        # Check the values of the (only) player 1 node.
        v1, v2 = cfr_metrics.compute_expected_utility_recursive(
            game, game.get_node(()), sigma_1, sigma_2,
            collections.defaultdict(float), collections.defaultdict(float),
            0.5, 1.0, 1.0)
        self.assertEqual(
            v1,
            (
                0.5 * (0 * 0.3 + -1 * 0.3 + 1 * 0.4) +  # RR, RP, RS
                0.3 * (1 * 0.3 + 0 * 0.3 + -1 * 0.4) +  # PR, PP, PS
                0.2 * (-1 * 0.3 + 1 * 0.3 + 0 * 0.4)))  # SR, SP, SS
        self.assertEqual(
            v2,
            (
                0.5 * (0 * 0.3 + 1 * 0.3 + -1 * 0.4) +  # RR, RP, RS
                0.3 * (-1 * 0.3 + 0 * 0.3 + 1 * 0.4) +  # PR, PP, PS
                0.2 * (1 * 0.3 + -1 * 0.3 + 0 * 0.4)))  # SR, SP, SS