def test_action_indexing(self): action_indexer = neural_game.ActionIndexer(['a', 'b', 'c', 'd']) self.assertEqual(action_indexer.get_index('a'), 0) self.assertEqual(action_indexer.get_index('d'), 3) self.assertEqual(action_indexer.get_action(2), 'c')
def create_neural_leduc(cards, max_raises=4, raise_amount=2): game = Leduc(cards, max_raises=max_raises, raise_amount=raise_amount) action_indexer = neural_game.ActionIndexer([0, 1, 2]) # Now compute the state vectors. We first define a one-hot-encoding based on the cards. card_indices = dict(enumerate(cards)) card_indices = {v: k for k, v in card_indices.items()} state_vectors = compute_state_vectors(game.info_set_ids.values(), card_indices, max_raises) info_set_vectoriser = neural_game.InfoSetVectoriser(state_vectors) return neural_game.NeuralGame(game, action_indexer, info_set_vectoriser)
def test_initialise(self): action_indexer = neural_game.ActionIndexer(['a', 'b']) network = deep_cfr.DeepRegretNetwork(state_shape=(5, ), action_indexer=action_indexer, player=1) with tf.Session() as sess: network.set_sess(sess) network.initialise() # Check we can predict network.predict_advantages(np.array([1.0, 2.0, 3.0, 4.0, 5.0]), action_indexer)
def test_predict_advantages(self): info_set_vector = np.array([1, 2, 3, 4, 5]).astype(np.float32) action_indexer = neural_game.ActionIndexer(['a', 'b']) network = deep_cfr.DeepRegretNetwork(state_shape=(5, ), action_indexer=action_indexer, player=1) with tf.Session() as sess: network.set_sess(sess) network.initialise() computed = network.predict_advantages(info_set_vector, action_indexer) self.assertEqual(set(computed.keys()), {'a', 'b'}) self.assertTrue(type(computed['a']) == np.float32) self.assertTrue(type(computed['b']) == np.float32)
def create_neural_rock_paper_scissors() -> typing.Tuple[extensive_game.ExtensiveGame, neural_game.ActionIndexer, neural_game.InfoSetVectoriser]: """Returns a rock paper scissors game, ActionIndexer and InfoStateVectoriser. Returns: NeuralGame """ game = create_rock_paper_scissors() action_indexer = neural_game.ActionIndexer(['R', 'P', 'S']) # There are only two information sets in rock, paper, scissors. Each player has one, since each player knows no # information about the other player's move when they take their own move. vectors = { game.get_info_set_id(game.root): np.array([1, 0]), game.get_info_set_id(game.root.children['R']): np.array([0, 1]), } info_set_vectoriser = neural_game.InfoSetVectoriser(vectors) return neural_game.NeuralGame(game, action_indexer, info_set_vectoriser)
def test_get_dim(self): action_indexer = neural_game.ActionIndexer(['a', 'b', 'c']) self.assertEqual(action_indexer.action_dim, 3) action_indexer = neural_game.ActionIndexer(['a', 'b', 'c', 'd', 'e']) self.assertEqual(action_indexer.action_dim, 5)