Beispiel #1
0
class MockNnTerminal():
    def __init__(self):
        ''' Creates an equity matrix with entries for every possible pair of buckets.
		'''
        self.bucketer = Bucketer()
        self.bucket_count = self.bucketer.get_bucket_count()
        bC, CC = self.bucket_count, game_settings.card_count
        self.equity_matrix = np.zeros([bC, bC], dtype=arguments.dtype)
        # filling equity matrix
        boards = card_tools.get_second_round_boards()
        self.board_count = boards.shape[0]
        self.terminal_equity = TerminalEquity()
        for i in range(self.board_count):
            board = boards[i]
            self.terminal_equity.set_board(board)
            call_matrix = self.terminal_equity.get_call_matrix()
            buckets = self.bucketer.compute_buckets(board)
            for c1 in range(CC):
                for c2 in range(CC):
                    b1 = buckets[c1]
                    b2 = buckets[c2]
                    if b1 > 0 and b2 > 0:
                        matrix_entry = call_matrix[c1][c2]
                        self.equity_matrix[b1, b2] = matrix_entry

    def get_value(self, inputs, outputs):
        ''' Gives the expected showdown equity of the two players' ranges.
		@param: inputs An (N,I) tensor containing N instances of neural net inputs.
				See @{net_builder} for details of each input.
		@param: outputs An (N,O) tensor in which to store N sets of expected showdown
				counterfactual values for each player.
		'''
        bC = self.bucket_count
        assert (outputs.ndim == 2)
        bucket_count = outputs.shape[1] / 2
        batch_size = outputs.shape[0]
        player_indexes = [(0, bC), (bC, 2 * bC)]
        players_count = 2
        for player in range(players_count):
            p_start, p_end = player_indexes[player]  # player idx
            o_start, o_end = player_indexes[1 - player]  # opponent idx
            outputs[:, p_start:p_end] = np.dot(inputs[:, o_start:o_end],
                                               self.equity_matrix)
Beispiel #2
0
class NextRoundValue():
    def __init__(self, value_nn):
        ''' Creates a tensor that can translate hand ranges to bucket ranges
			on any board.
		@param: Nn.ValueNn object
		'''
        self._values_are_prepared = False
        self.nn = value_nn
        self._init_bucketing()

    def _init_bucketing(self):
        ''' Initializes the tensor that translates hand ranges to bucket ranges.
		'''
        self.bucketer = Bucketer()
        self.bucket_count = self.bucketer.get_bucket_count()
        boards = card_tools.get_second_round_boards()
        self.board_count = boards.shape[0]
        CC, BC, bC = game_settings.card_count, self.board_count, self.bucket_count
        self._range_matrix = np.zeros([CC, BC * bC], dtype=arguments.dtype)
        self._range_matrix_board_view = self._range_matrix.reshape(
            [CC, BC, bC])
        for idx in range(BC):
            board = boards[idx]
            buckets = self.bucketer.compute_buckets(board)
            class_ids = np.arange(bC)
            class_ids = class_ids.reshape([1, bC]) * np.ones(
                [CC, bC], dtype=class_ids.dtype)
            card_buckets = buckets.reshape([CC, 1]) * np.ones(
                [CC, bC], dtype=class_ids.dtype)
            # finding all strength classes
            # matrix for transformation from card ranges to strength class ranges
            self._range_matrix_board_view[:, idx, :][class_ids ==
                                                     card_buckets] = 1
        # matrix for transformation from class values to card values
        self._reverse_value_matrix = self._range_matrix.T.copy()
        # we need to div the matrix by the sum of possible boards
        # (from point of view of each hand)
        weight_constant = 1 / (BC - 2)  # count
        self._reverse_value_matrix *= weight_constant

    def _card_range_to_bucket_range(self, card_range, bucket_range):
        ''' Converts a range vector over private hands to a range vector over buckets.
		@param: card_range a probability vector over private hands
		@param: bucket_range a vector in which to store the output probabilities
				over buckets
		'''
        bucket_range[:, :] = np.dot(card_range, self._range_matrix)

    def _bucket_value_to_card_value(self, bucket_value, card_value):
        ''' Converts a value vector over buckets to a value vector over private hands.
		@param: bucket_value a value vector over buckets
		@param: card_value a vector in which to store the output values over
				private hands
		'''
        card_value[:, :] = np.dot(bucket_value, self._reverse_value_matrix)

    def _bucket_value_to_card_value_on_board(self, board, bucket_value,
                                             card_value):
        ''' Converts a value vector over buckets to a value vector over
			private hands given a particular set of board cards.
		@param: board a non-empty vector of board cards
		@param: bucket_value a value vector over buckets
		@param: card_value a vector in which to store the output values over
				private hands
		'''
        CC, bC = game_settings.card_count, self.bucket_count
        board_idx = card_tools.get_board_index(board)
        board_matrix = self._range_matrix_board_view[:, board_idx, :].T
        serialized_card_value = card_value.reshape([-1, CC])
        serialized_bucket_value = bucket_value[:, :,
                                               board_idx, :].copy().reshape(
                                                   [-1, bC])
        serialized_card_value[:, :] = np.dot(serialized_bucket_value,
                                             board_matrix)

    def start_computation(self, pot_sizes):
        ''' Initializes the value calculator with the pot size of each state that
			we are going to evaluate.
			During continual re-solving, there is one pot size for each
			initial state of the second betting round (before board cards are dealt).
			? at this point betting round ends ?
		@param pot_sizes a vector of pot sizes
		'''
        self.iter = 0
        self.pot_sizes = pot_sizes.reshape([-1, 1]).copy()
        self.batch_size = pot_sizes.shape[0]

    def get_value(self, ranges, values):
        ''' Gives the predicted counterfactual values at each evaluated state,
			given input ranges.
		@{start_computation} must be called first. Each state to be evaluated must
				be given in the same order that pot sizes were given for that function.
				Keeps track of iterations internally, so should be called exactly
				once for every iteration of continual re-solving.
		@param: ranges An (N,2,K) tensor, where N is the number of states evaluated
				(must match input to @{start_computation}), 2 is the number of players,
				and K is the number of private hands. Contains N sets of 2 range vectors.
		@param: values an (N,2,K) tensor in which to store the N sets of 2 value vectors
				which are output
		'''
        PC, BC = constants.players_count, self.board_count
        BS, bC = self.batch_size, self.bucket_count
        assert (ranges is not None and values is not None)
        assert (ranges.shape[0] == self.batch_size)
        self.iter += 1
        if self.iter == 1:
            # initializing data structures
            self.next_round_inputs = np.zeros([BS, BC, bC * PC + 1],
                                              dtype=arguments.dtype)
            self.next_round_values = np.zeros([BS, BC, PC, bC],
                                              dtype=arguments.dtype)
            self.transposed_next_round_values = np.zeros([BS, PC, BC, bC],
                                                         dtype=arguments.dtype)
            self.next_round_extended_range = np.zeros([BS, PC, BC * bC],
                                                      dtype=arguments.dtype)
            self.next_round_serialized_range = self.next_round_extended_range.reshape(
                [-1, bC])
            self.range_normalization = np.zeros([])
            self.value_normalization = np.zeros([BS, PC, BC],
                                                dtype=arguments.dtype)
            # handling pot feature for the nn
            nn_bet_input = self.pot_sizes.copy() * (1 / arguments.stack)
            nn_bet_input = nn_bet_input.reshape([-1, 1]) * np.ones(
                [BS, BC], dtype=nn_bet_input.dtype)
            self.next_round_inputs[:, :, -1] = nn_bet_input.copy()
        # we need to find if we need remember something in this iteration
        use_memory = self.iter > arguments.cfr_skip_iters
        if use_memory and self.iter == arguments.cfr_skip_iters + 1:
            # first iter that we need to remember something - we need to init data structures
            self.range_normalization_memory = np.zeros([BS * BC * PC, 1],
                                                       dtype=arguments.dtype)
            self.counterfactual_value_memory = np.zeros([BS, PC, BC, bC],
                                                        dtype=arguments.dtype)
        # computing bucket range in next street for both players at once
        self._card_range_to_bucket_range(
            ranges.reshape([BS * PC, -1]),
            self.next_round_extended_range.reshape([BS * PC, -1]))
        self.range_normalization = np.sum(self.next_round_serialized_range,
                                          axis=1,
                                          keepdims=True)
        rn_view = self.range_normalization.reshape([BS, PC, BC])
        for player in range(constants.players_count):
            self.value_normalization[:, player, :] = rn_view[:, 1 -
                                                             player, :].copy()
        if use_memory:
            self.range_normalization_memory += self.value_normalization.reshape(
                self.range_normalization_memory.shape)
        # eliminating division by zero
        self.range_normalization[self.range_normalization == 0] = 1
        self.next_round_serialized_range /= self.range_normalization * np.ones_like(
            self.next_round_serialized_range)
        serialized_range_by_player = self.next_round_serialized_range.reshape(
            [BS, PC, BC, bC])
        for player in range(constants.players_count):
            self.next_round_inputs[:, :, player * bC:(
                player +
                1) * bC] = self.next_round_extended_range[:, player, :].copy(
                ).reshape(self.next_round_inputs[:, :, player *
                                                 bC:(player + 1) * bC].shape)
        # using nn to compute values
        serialized_inputs_view = self.next_round_inputs.reshape([BS * BC, -1])
        serialized_values_view = self.next_round_values.reshape([BS * BC, -1])
        # computing value in the next round
        self.nn.get_value(serialized_inputs_view, serialized_values_view)
        # normalizing values back according to the orginal range sum
        normalization_view = np.transpose(
            self.value_normalization.reshape([BS, PC, BC, 1]),
            [0, 2, 1, 3])  # :transpose(2,3)
        self.next_round_values *= normalization_view * np.ones_like(
            self.next_round_values)
        self.transposed_next_round_values = np.transpose(
            self.next_round_values, [0, 2, 1, 3]).copy()  # :transpose(3,2)
        # remembering the values for the next round
        if use_memory:
            self.counterfactual_value_memory += self.transposed_next_round_values
        # translating bucket values back to the card values
        self._bucket_value_to_card_value(
            self.transposed_next_round_values.reshape([BS * PC, -1]),
            values.reshape([BS * PC, -1]))

    def get_value_on_board(self, board, values):
        ''' Gives the average counterfactual values on the given board
			across previous calls to @{get_value}.
			Used to update opponent counterfactual values during re-solving
			after board cards are dealt.
		@param: board a non-empty vector of board cards
		@param: values a tensor in which to store the values
		'''
        # check if we have evaluated correct number of iterations
        assert (self.iter == arguments.cfr_iters)
        batch_size = values.shape[0]
        assert (batch_size == self.batch_size)
        self._prepare_next_round_values()
        self._bucket_value_to_card_value_on_board(
            board, self.counterfactual_value_memory, values)

    def _prepare_next_round_values(self):
        ''' Normalizes the counterfactual values remembered between @{get_value}
			calls so that they are an average rather than a sum.
		'''
        bC = self.bucket_count
        assert (self.iter == arguments.cfr_iters)
        # do nothing if already prepared
        if self._values_are_prepared:
            return
        # eliminating division by zero
        self.range_normalization_memory[self.range_normalization_memory ==
                                        0] = 1
        serialized_memory_view = self.counterfactual_value_memory.reshape(
            [-1, bC])
        serialized_memory_view[:, :] /= self.range_normalization_memory * np.ones_like(
            serialized_memory_view)
        self._values_are_prepared = True
Beispiel #3
0
class BucketConversion():
    def __init__(self):
        self.bucketer = None
        self.bucket_count = None
        self._range_matrix = None
        self._reverse_value_matrix = None

    def set_board(self, board):
        ''' Sets the board cards for the bucketer.
		@param: board a non-empty vector of board cards
		ex: BCC = 1 => BC = 6, CC = 6
			buckets = [-1 13 14 -1 16 17]
			self._range_matrix =    	 12 13 14 15 16 17  (indexes)
										  V  V  V  V  V  V
	[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
	  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
	 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
	 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
	 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
	 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
		'''
        self.bucketer = Bucketer()
        self.bucket_count = self.bucketer.get_bucket_count()  # BC*CC
        CC, bC = game_settings.card_count, self.bucket_count
        self._range_matrix = np.zeros([CC, bC], dtype=arguments.int_dtype)
        class_ids = np.arange(bC, dtype=arguments.int_dtype)
        class_ids = class_ids.reshape([1, bC]) * np.ones([CC, bC],
                                                         dtype=class_ids.dtype)
        buckets = self.bucketer.compute_buckets(
            board)  # [BC*CC, ..., (BC+1)*CC)
        card_buckets = buckets.reshape([CC, 1]) * np.ones([CC, bC],
                                                          dtype=buckets.dtype)
        # finding all strength classes
        # matrix for transformation from card ranges to strength class ranges
        self._range_matrix[class_ids == card_buckets] = 1
        # matrix for transformation form class values to card values
        self._reverse_value_matrix = self._range_matrix.T.copy()

    def card_range_to_bucket_range(self, card_range, bucket_range):
        ''' Converts a range vector over private hands to a range vector over buckets.
		@{set_board} must be called first. Used to create inputs to the neural net.
		@param: card_range (1, CC) a probability vector over private hands
		@param: bucket_range (1,bC) a vector in which to save the resulting probability
				vector over buckets
		'''
        bucket_range[:, :] = np.dot(card_range, self._range_matrix)

    def bucket_value_to_card_value(self, bucket_value, card_value):
        ''' Converts a value vector over buckets to a value vector over private hands.
		@{set_board} must be called first. Used to process neural net outputs.
		@param: bucket_value a vector of values over buckets
		@param: card_value a vector in which to save the resulting vector of values
				over private hands
		'''
        card_value[:, :] = np.dot(bucket_value, self._reverse_value_matrix)

    def get_possible_bucket_mask(self):
        ''' Gives a vector of possible buckets on the the board.
		@{set_board} must be called first.
		@return a mask vector over buckets where each entry is 1 if the bucket is
				valid, 0 if not
		'''
        # CC = game_settings.card_count
        # mask = np.zeros([1, self.bucket_count], dtype=arguments.dtype)
        # card_indicator = np.ones([1,CC], dtype=arguments.dtype)
        # mask = np.dot(card_indicator, self._range_matrix)
        mask = np.sum(self._range_matrix, axis=0,
                      keepdims=True)  # (1,self.bucket_count)
        return mask