Ejemplo n.º 1
0
    def test_action_indexing(self):
        action_indexer = neural_game.ActionIndexer(['a', 'b', 'c', 'd'])

        self.assertEqual(action_indexer.get_index('a'), 0)
        self.assertEqual(action_indexer.get_index('d'), 3)

        self.assertEqual(action_indexer.get_action(2), 'c')
Ejemplo n.º 2
0
def create_neural_leduc(cards, max_raises=4, raise_amount=2):
    game = Leduc(cards, max_raises=max_raises, raise_amount=raise_amount)

    action_indexer = neural_game.ActionIndexer([0, 1, 2])

    # Now compute the state vectors. We first define a one-hot-encoding based on the cards.
    card_indices = dict(enumerate(cards))
    card_indices = {v: k for k, v in card_indices.items()}
    state_vectors = compute_state_vectors(game.info_set_ids.values(), card_indices, max_raises)

    info_set_vectoriser = neural_game.InfoSetVectoriser(state_vectors)

    return neural_game.NeuralGame(game, action_indexer, info_set_vectoriser)
Ejemplo n.º 3
0
    def test_initialise(self):

        action_indexer = neural_game.ActionIndexer(['a', 'b'])
        network = deep_cfr.DeepRegretNetwork(state_shape=(5, ),
                                             action_indexer=action_indexer,
                                             player=1)

        with tf.Session() as sess:
            network.set_sess(sess)
            network.initialise()

            # Check we can predict
            network.predict_advantages(np.array([1.0, 2.0, 3.0, 4.0, 5.0]),
                                       action_indexer)
Ejemplo n.º 4
0
    def test_predict_advantages(self):
        info_set_vector = np.array([1, 2, 3, 4, 5]).astype(np.float32)
        action_indexer = neural_game.ActionIndexer(['a', 'b'])

        network = deep_cfr.DeepRegretNetwork(state_shape=(5, ),
                                             action_indexer=action_indexer,
                                             player=1)

        with tf.Session() as sess:
            network.set_sess(sess)
            network.initialise()

            computed = network.predict_advantages(info_set_vector,
                                                  action_indexer)

            self.assertEqual(set(computed.keys()), {'a', 'b'})
            self.assertTrue(type(computed['a']) == np.float32)
            self.assertTrue(type(computed['b']) == np.float32)
Ejemplo n.º 5
0
def create_neural_rock_paper_scissors() -> typing.Tuple[extensive_game.ExtensiveGame,
                                                        neural_game.ActionIndexer,
                                                        neural_game.InfoSetVectoriser]:
    """Returns a rock paper scissors game, ActionIndexer and InfoStateVectoriser.

    Returns:
        NeuralGame
    """
    game = create_rock_paper_scissors()

    action_indexer = neural_game.ActionIndexer(['R', 'P', 'S'])

    # There are only two information sets in rock, paper, scissors. Each player has one, since each player knows no
    # information about the other player's move when they take their own move.
    vectors = {
        game.get_info_set_id(game.root): np.array([1, 0]),
        game.get_info_set_id(game.root.children['R']): np.array([0, 1]),
    }
    info_set_vectoriser = neural_game.InfoSetVectoriser(vectors)

    return neural_game.NeuralGame(game, action_indexer, info_set_vectoriser)
Ejemplo n.º 6
0
    def test_get_dim(self):
        action_indexer = neural_game.ActionIndexer(['a', 'b', 'c'])
        self.assertEqual(action_indexer.action_dim, 3)

        action_indexer = neural_game.ActionIndexer(['a', 'b', 'c', 'd', 'e'])
        self.assertEqual(action_indexer.action_dim, 5)