예제 #1
0
    def _init_bucketing(self):
        ''' Initializes the tensor that translates hand ranges to bucket ranges.
		'''
        self.bucketer = Bucketer()
        self.bucket_count = self.bucketer.get_bucket_count()
        boards = card_tools.get_second_round_boards()
        self.board_count = boards.shape[0]
        CC, BC, bC = game_settings.card_count, self.board_count, self.bucket_count
        self._range_matrix = np.zeros([CC, BC * bC], dtype=arguments.dtype)
        self._range_matrix_board_view = self._range_matrix.reshape(
            [CC, BC, bC])
        for idx in range(BC):
            board = boards[idx]
            buckets = self.bucketer.compute_buckets(board)
            class_ids = np.arange(bC)
            class_ids = class_ids.reshape([1, bC]) * np.ones(
                [CC, bC], dtype=class_ids.dtype)
            card_buckets = buckets.reshape([CC, 1]) * np.ones(
                [CC, bC], dtype=class_ids.dtype)
            # finding all strength classes
            # matrix for transformation from card ranges to strength class ranges
            self._range_matrix_board_view[:, idx, :][class_ids ==
                                                     card_buckets] = 1
        # matrix for transformation from class values to card values
        self._reverse_value_matrix = self._range_matrix.T.copy()
        # we need to div the matrix by the sum of possible boards
        # (from point of view of each hand)
        weight_constant = 1 / (BC - 2)  # count
        self._reverse_value_matrix *= weight_constant
예제 #2
0
	def generate_data_file(self, data_count, file_name):
		''' Generates data files containing examples of random poker situations
			with associated terminal equity.
			Each poker situation is randomly generated using @{range_generator} and
			@{random_card_generator}.
			For description of neural net input and target type, see @{net_builder}.
		@param: data_count the number of examples to generate
		@param: file_name the prefix of the files where the data is saved (appended
				with `.inputs`, `.targets`, and `.mask`).
		'''
		range_generator = RangeGenerator()
		batch_size = arguments.gen_batch_size
		assert(data_count % batch_size == 0, 'data count has to be divisible by the batch size')
		batch_count = data_count / batch_size
		bucketer = Bucketer()
		bucket_count = bucketer.get_bucket_count()
		player_count = 2
		target_size = bucket_count * player_count
		targets = np.zeros([data_count, target_size], dtype=arguments.dtype)
		input_size = bucket_count * player_count + 1
		inputs = np.zeros([data_count, input_size], dtype=arguments.dtype)
		mask = np.zeros([data_count, bucket_count], dtype=arguments.dtype)
		bucket_conversion = BucketConversion()
		equity = TerminalEquity()
		for batch in range(1, batch_count+1):
			board = card_generator.generate_cards(game_settings.board_card_count)
			range_generator.set_board(board)
			bucket_conversion.set_board(board)
			equity.set_board(board)
			# generating ranges
			ranges = np.zeros([player_count, batch_size, game_settings.card_count], dtype=arguments.dtype)
			for player in range(1, player_count+1):
				range_generator.generate_range(ranges[player])
			pot_sizes = np.zeros([arguments.gen_batch_size, 1], dtype=arguments.dtype)
			# generating pot features
			pot_sizes = np.random.rand(batch_size)
			# translating ranges to features
			batch_index = ( (batch-1)*batch_size+1, batch*batch_size )
			b_start, b_end = batch_index
			pot_feature_index =  -1
			inputs[ b_start:b_end , pot_feature_index ] = pot_sizes.copy()
			player_indexes = [(1,bucket_count), (bucket_count+1,bucket_count*2)]
			for player in range(1, player_count+1):
				p_start, p_end = player_indexes[player]
				bucket_conversion:card_range_to_bucket_range(ranges[player], inputs[ b_start:b_end , p_start:p_end ])
			# computaton of values using terminal equity
			values = np.zeros([player_count, batch_size, game_settings.card_count], dtype=arguments.dtype)
			for player in range(1, player_count+1):
				opponent = 3 - player
				equity.call_value(ranges[opponent], values[player])
			# translating values to nn targets
			for player in range(1, player_count+1):
				p_start, p_end = player_indexes[player]
				bucket_conversio.card_range_to_bucket_range(values[player], targets[ b_start:b_end , p_start:p_end ])
			# computing a mask of possible buckets
			bucket_mask = bucket_conversion.get_possible_bucket_mask()
			mask[ b_start:b_end , : ] = bucket_mask.copy() * np.ones([batch_size, bucket_count], dtype=arguments.dtype)
		np.save(file_name + '.inputs', inputs)
		np.save(file_name + '.targets', targets)
		np.save(file_name + '.mask', mask)
예제 #3
0
    def set_board(self, board):
        ''' Sets the board cards for the bucketer.
		@param: board a non-empty vector of board cards
		ex: BCC = 1 => BC = 6, CC = 6
			buckets = [-1 13 14 -1 16 17]
			self._range_matrix =    	 12 13 14 15 16 17  (indexes)
										  V  V  V  V  V  V
	[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
	  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
	 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
	 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
	 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
	 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
		'''
        self.bucketer = Bucketer()
        self.bucket_count = self.bucketer.get_bucket_count()  # BC*CC
        CC, bC = game_settings.card_count, self.bucket_count
        self._range_matrix = np.zeros([CC, bC], dtype=arguments.int_dtype)
        class_ids = np.arange(bC, dtype=arguments.int_dtype)
        class_ids = class_ids.reshape([1, bC]) * np.ones([CC, bC],
                                                         dtype=class_ids.dtype)
        buckets = self.bucketer.compute_buckets(
            board)  # [BC*CC, ..., (BC+1)*CC)
        card_buckets = buckets.reshape([CC, 1]) * np.ones([CC, bC],
                                                          dtype=buckets.dtype)
        # finding all strength classes
        # matrix for transformation from card ranges to strength class ranges
        self._range_matrix[class_ids == card_buckets] = 1
        # matrix for transformation form class values to card values
        self._reverse_value_matrix = self._range_matrix.T.copy()
예제 #4
0
class MockNnTerminal():
    def __init__(self):
        ''' Creates an equity matrix with entries for every possible pair of buckets.
		'''
        self.bucketer = Bucketer()
        self.bucket_count = self.bucketer.get_bucket_count()
        bC, CC = self.bucket_count, game_settings.card_count
        self.equity_matrix = np.zeros([bC, bC], dtype=arguments.dtype)
        # filling equity matrix
        boards = card_tools.get_second_round_boards()
        self.board_count = boards.shape[0]
        self.terminal_equity = TerminalEquity()
        for i in range(self.board_count):
            board = boards[i]
            self.terminal_equity.set_board(board)
            call_matrix = self.terminal_equity.get_call_matrix()
            buckets = self.bucketer.compute_buckets(board)
            for c1 in range(CC):
                for c2 in range(CC):
                    b1 = buckets[c1]
                    b2 = buckets[c2]
                    if b1 > 0 and b2 > 0:
                        matrix_entry = call_matrix[c1][c2]
                        self.equity_matrix[b1, b2] = matrix_entry

    def get_value(self, inputs, outputs):
        ''' Gives the expected showdown equity of the two players' ranges.
		@param: inputs An (N,I) tensor containing N instances of neural net inputs.
				See @{net_builder} for details of each input.
		@param: outputs An (N,O) tensor in which to store N sets of expected showdown
				counterfactual values for each player.
		'''
        bC = self.bucket_count
        assert (outputs.ndim == 2)
        bucket_count = outputs.shape[1] / 2
        batch_size = outputs.shape[0]
        player_indexes = [(0, bC), (bC, 2 * bC)]
        players_count = 2
        for player in range(players_count):
            p_start, p_end = player_indexes[player]  # player idx
            o_start, o_end = player_indexes[1 - player]  # opponent idx
            outputs[:, p_start:p_end] = np.dot(inputs[:, o_start:o_end],
                                               self.equity_matrix)
예제 #5
0
    def __init__(self):
        ''' Creates an equity matrix with entries for every possible pair of buckets.
		'''
        self.bucketer = Bucketer()
        self.bucket_count = self.bucketer.get_bucket_count()
        bC, CC = self.bucket_count, game_settings.card_count
        self.equity_matrix = np.zeros([bC, bC], dtype=arguments.dtype)
        # filling equity matrix
        boards = card_tools.get_second_round_boards()
        self.board_count = boards.shape[0]
        self.terminal_equity = TerminalEquity()
        for i in range(self.board_count):
            board = boards[i]
            self.terminal_equity.set_board(board)
            call_matrix = self.terminal_equity.get_call_matrix()
            buckets = self.bucketer.compute_buckets(board)
            for c1 in range(CC):
                for c2 in range(CC):
                    b1 = buckets[c1]
                    b2 = buckets[c2]
                    if b1 > 0 and b2 > 0:
                        matrix_entry = call_matrix[c1][c2]
                        self.equity_matrix[b1, b2] = matrix_entry
예제 #6
0
    def build_net(self):
        ''' Builds a neural net with architecture specified by @{arguments.net}.
		@return a newly constructed neural net
		@return input shape (ex: [224,224,3] if img)
		@return output shape (ex: [10] if 10 classes)
		'''
        # input and output parameters
        bucketer = Bucketer()
        bucket_count = bucketer.get_bucket_count()
        player_count = 2
        num_output = bucket_count * player_count
        num_input = num_output + 1
        input_shape = [num_input]
        output_shape = [num_output]
        # neural network architecture
        m_input = keras.layers.Input(input_shape, name='input')
        # slicing off pot size ([1,2001] -> [1,2000])
        sp = keras.layers.Lambda(lambda x: x[:, :-1],
                                 name='input_ranges')(m_input)
        # feed forward part
        ff = m_input
        for i in range(arguments.num_layers):
            names = [s.format(i) for s in ('dense_{}', 'prelu_{}')]
            ff = keras.layers.Dense(arguments.num_neurons, name=names[0])(ff)
            ff = keras.layers.PReLU(name=names[1])(ff)
        ff = keras.layers.Dense(num_output, name='feed_forward_output')(ff)
        # dot product of both (feed forward and player ranges)
        d = keras.layers.dot([ff, sp], axes=1, name='dot_product')
        # repeat this number from shape [1] -> [2000]
        d = keras.layers.RepeatVector(num_output, name='repeat_scalar')(d)
        d = keras.layers.Flatten(name='flatten')(d)
        # divide it by 2
        d = keras.layers.Lambda(lambda x: x / 2, name='divide_by_2')(d)
        # subtract input (without pot) and last layer
        m_output = keras.layers.subtract([ff, d], name='zero_sum_output')
        model = keras.models.Model(m_input, m_output)
        return model, input_shape, output_shape
예제 #7
0
class NextRoundValue():
    def __init__(self, value_nn):
        ''' Creates a tensor that can translate hand ranges to bucket ranges
			on any board.
		@param: Nn.ValueNn object
		'''
        self._values_are_prepared = False
        self.nn = value_nn
        self._init_bucketing()

    def _init_bucketing(self):
        ''' Initializes the tensor that translates hand ranges to bucket ranges.
		'''
        self.bucketer = Bucketer()
        self.bucket_count = self.bucketer.get_bucket_count()
        boards = card_tools.get_second_round_boards()
        self.board_count = boards.shape[0]
        CC, BC, bC = game_settings.card_count, self.board_count, self.bucket_count
        self._range_matrix = np.zeros([CC, BC * bC], dtype=arguments.dtype)
        self._range_matrix_board_view = self._range_matrix.reshape(
            [CC, BC, bC])
        for idx in range(BC):
            board = boards[idx]
            buckets = self.bucketer.compute_buckets(board)
            class_ids = np.arange(bC)
            class_ids = class_ids.reshape([1, bC]) * np.ones(
                [CC, bC], dtype=class_ids.dtype)
            card_buckets = buckets.reshape([CC, 1]) * np.ones(
                [CC, bC], dtype=class_ids.dtype)
            # finding all strength classes
            # matrix for transformation from card ranges to strength class ranges
            self._range_matrix_board_view[:, idx, :][class_ids ==
                                                     card_buckets] = 1
        # matrix for transformation from class values to card values
        self._reverse_value_matrix = self._range_matrix.T.copy()
        # we need to div the matrix by the sum of possible boards
        # (from point of view of each hand)
        weight_constant = 1 / (BC - 2)  # count
        self._reverse_value_matrix *= weight_constant

    def _card_range_to_bucket_range(self, card_range, bucket_range):
        ''' Converts a range vector over private hands to a range vector over buckets.
		@param: card_range a probability vector over private hands
		@param: bucket_range a vector in which to store the output probabilities
				over buckets
		'''
        bucket_range[:, :] = np.dot(card_range, self._range_matrix)

    def _bucket_value_to_card_value(self, bucket_value, card_value):
        ''' Converts a value vector over buckets to a value vector over private hands.
		@param: bucket_value a value vector over buckets
		@param: card_value a vector in which to store the output values over
				private hands
		'''
        card_value[:, :] = np.dot(bucket_value, self._reverse_value_matrix)

    def _bucket_value_to_card_value_on_board(self, board, bucket_value,
                                             card_value):
        ''' Converts a value vector over buckets to a value vector over
			private hands given a particular set of board cards.
		@param: board a non-empty vector of board cards
		@param: bucket_value a value vector over buckets
		@param: card_value a vector in which to store the output values over
				private hands
		'''
        CC, bC = game_settings.card_count, self.bucket_count
        board_idx = card_tools.get_board_index(board)
        board_matrix = self._range_matrix_board_view[:, board_idx, :].T
        serialized_card_value = card_value.reshape([-1, CC])
        serialized_bucket_value = bucket_value[:, :,
                                               board_idx, :].copy().reshape(
                                                   [-1, bC])
        serialized_card_value[:, :] = np.dot(serialized_bucket_value,
                                             board_matrix)

    def start_computation(self, pot_sizes):
        ''' Initializes the value calculator with the pot size of each state that
			we are going to evaluate.
			During continual re-solving, there is one pot size for each
			initial state of the second betting round (before board cards are dealt).
			? at this point betting round ends ?
		@param pot_sizes a vector of pot sizes
		'''
        self.iter = 0
        self.pot_sizes = pot_sizes.reshape([-1, 1]).copy()
        self.batch_size = pot_sizes.shape[0]

    def get_value(self, ranges, values):
        ''' Gives the predicted counterfactual values at each evaluated state,
			given input ranges.
		@{start_computation} must be called first. Each state to be evaluated must
				be given in the same order that pot sizes were given for that function.
				Keeps track of iterations internally, so should be called exactly
				once for every iteration of continual re-solving.
		@param: ranges An (N,2,K) tensor, where N is the number of states evaluated
				(must match input to @{start_computation}), 2 is the number of players,
				and K is the number of private hands. Contains N sets of 2 range vectors.
		@param: values an (N,2,K) tensor in which to store the N sets of 2 value vectors
				which are output
		'''
        PC, BC = constants.players_count, self.board_count
        BS, bC = self.batch_size, self.bucket_count
        assert (ranges is not None and values is not None)
        assert (ranges.shape[0] == self.batch_size)
        self.iter += 1
        if self.iter == 1:
            # initializing data structures
            self.next_round_inputs = np.zeros([BS, BC, bC * PC + 1],
                                              dtype=arguments.dtype)
            self.next_round_values = np.zeros([BS, BC, PC, bC],
                                              dtype=arguments.dtype)
            self.transposed_next_round_values = np.zeros([BS, PC, BC, bC],
                                                         dtype=arguments.dtype)
            self.next_round_extended_range = np.zeros([BS, PC, BC * bC],
                                                      dtype=arguments.dtype)
            self.next_round_serialized_range = self.next_round_extended_range.reshape(
                [-1, bC])
            self.range_normalization = np.zeros([])
            self.value_normalization = np.zeros([BS, PC, BC],
                                                dtype=arguments.dtype)
            # handling pot feature for the nn
            nn_bet_input = self.pot_sizes.copy() * (1 / arguments.stack)
            nn_bet_input = nn_bet_input.reshape([-1, 1]) * np.ones(
                [BS, BC], dtype=nn_bet_input.dtype)
            self.next_round_inputs[:, :, -1] = nn_bet_input.copy()
        # we need to find if we need remember something in this iteration
        use_memory = self.iter > arguments.cfr_skip_iters
        if use_memory and self.iter == arguments.cfr_skip_iters + 1:
            # first iter that we need to remember something - we need to init data structures
            self.range_normalization_memory = np.zeros([BS * BC * PC, 1],
                                                       dtype=arguments.dtype)
            self.counterfactual_value_memory = np.zeros([BS, PC, BC, bC],
                                                        dtype=arguments.dtype)
        # computing bucket range in next street for both players at once
        self._card_range_to_bucket_range(
            ranges.reshape([BS * PC, -1]),
            self.next_round_extended_range.reshape([BS * PC, -1]))
        self.range_normalization = np.sum(self.next_round_serialized_range,
                                          axis=1,
                                          keepdims=True)
        rn_view = self.range_normalization.reshape([BS, PC, BC])
        for player in range(constants.players_count):
            self.value_normalization[:, player, :] = rn_view[:, 1 -
                                                             player, :].copy()
        if use_memory:
            self.range_normalization_memory += self.value_normalization.reshape(
                self.range_normalization_memory.shape)
        # eliminating division by zero
        self.range_normalization[self.range_normalization == 0] = 1
        self.next_round_serialized_range /= self.range_normalization * np.ones_like(
            self.next_round_serialized_range)
        serialized_range_by_player = self.next_round_serialized_range.reshape(
            [BS, PC, BC, bC])
        for player in range(constants.players_count):
            self.next_round_inputs[:, :, player * bC:(
                player +
                1) * bC] = self.next_round_extended_range[:, player, :].copy(
                ).reshape(self.next_round_inputs[:, :, player *
                                                 bC:(player + 1) * bC].shape)
        # using nn to compute values
        serialized_inputs_view = self.next_round_inputs.reshape([BS * BC, -1])
        serialized_values_view = self.next_round_values.reshape([BS * BC, -1])
        # computing value in the next round
        self.nn.get_value(serialized_inputs_view, serialized_values_view)
        # normalizing values back according to the orginal range sum
        normalization_view = np.transpose(
            self.value_normalization.reshape([BS, PC, BC, 1]),
            [0, 2, 1, 3])  # :transpose(2,3)
        self.next_round_values *= normalization_view * np.ones_like(
            self.next_round_values)
        self.transposed_next_round_values = np.transpose(
            self.next_round_values, [0, 2, 1, 3]).copy()  # :transpose(3,2)
        # remembering the values for the next round
        if use_memory:
            self.counterfactual_value_memory += self.transposed_next_round_values
        # translating bucket values back to the card values
        self._bucket_value_to_card_value(
            self.transposed_next_round_values.reshape([BS * PC, -1]),
            values.reshape([BS * PC, -1]))

    def get_value_on_board(self, board, values):
        ''' Gives the average counterfactual values on the given board
			across previous calls to @{get_value}.
			Used to update opponent counterfactual values during re-solving
			after board cards are dealt.
		@param: board a non-empty vector of board cards
		@param: values a tensor in which to store the values
		'''
        # check if we have evaluated correct number of iterations
        assert (self.iter == arguments.cfr_iters)
        batch_size = values.shape[0]
        assert (batch_size == self.batch_size)
        self._prepare_next_round_values()
        self._bucket_value_to_card_value_on_board(
            board, self.counterfactual_value_memory, values)

    def _prepare_next_round_values(self):
        ''' Normalizes the counterfactual values remembered between @{get_value}
			calls so that they are an average rather than a sum.
		'''
        bC = self.bucket_count
        assert (self.iter == arguments.cfr_iters)
        # do nothing if already prepared
        if self._values_are_prepared:
            return
        # eliminating division by zero
        self.range_normalization_memory[self.range_normalization_memory ==
                                        0] = 1
        serialized_memory_view = self.counterfactual_value_memory.reshape(
            [-1, bC])
        serialized_memory_view[:, :] /= self.range_normalization_memory * np.ones_like(
            serialized_memory_view)
        self._values_are_prepared = True
예제 #8
0
    def generate_data_file(self, data_count):
        ''' Generates data files containing examples of random poker situations with
			counterfactual values from an associated solution.
			Each poker situation is randomly generated using @{range_generator} and
			@{random_card_generator}. For description of neural net input and target
			type, see @{net_builder}.
		@param: data_count the number of examples to generate
		'''
        BS, PC = arguments.gen_batch_size, constants.players_count
        BCC, CC = game_settings.board_card_count, game_settings.card_count
        range_generator = RangeGenerator()
        assert (data_count % BS == 0,
                'data count has to be divisible by the batch size')
        batch_count = int(data_count / BS)
        bucketer = Bucketer()
        bucket_count = bucketer.get_bucket_count()
        bC = bucket_count
        target_size = bC * PC
        targets = np.zeros([data_count, target_size], dtype=arguments.dtype)
        input_size = bC * PC + 1
        inputs = np.zeros([data_count, input_size], dtype=arguments.dtype)
        masks = np.zeros([data_count, bC], dtype=np.uint8)
        bucket_conversion = BucketConversion()
        for b in tqdm(range(batch_count)):
            board = card_generator.generate_cards(BCC)
            range_generator.set_board(board)
            bucket_conversion.set_board(board)
            # generating ranges
            ranges = np.zeros([PC, BS, CC], dtype=arguments.dtype)
            for player in range(PC):
                range_generator.generate_range(ranges[player])
                # just simple random card generator below
                # prob = np.random.rand(BS,CC)
                # ranges[player] = prob / np.sum(prob, axis=1).reshape([BS,1])
            # generating pot sizes between ante and stack - 0.1
            min_pot = arguments.ante
            max_pot = arguments.stack - 0.1
            pot_range = max_pot - min_pot
            random_pot_sizes = np.random.rand(
                BS, 1) * pot_range + min_pot  # (BS,1)
            # pot features are pot sizes normalized between (ante/stack,1)
            pot_size_features = random_pot_sizes.copy() * (1 / arguments.stack)
            # translating ranges to features
            pot_feature_index = -1
            inputs[b * BS:(b + 1) * BS,
                   pot_feature_index] = pot_size_features.reshape(
                       inputs[b * BS:(b + 1) * BS, pot_feature_index].shape)
            player_indexes = [(0, bC), (bC, bC * 2)]
            for player in range(PC):
                start_idx, end_idx = player_indexes[player]
                bucket_conversion.card_range_to_bucket_range(
                    ranges[player], inputs[b * BS:(b + 1) * BS,
                                           start_idx:end_idx])
            # computaton of values using re-solving
            values = np.zeros([PC, BS, CC], dtype=arguments.dtype)
            for i in range(BS):
                resolving = Resolving()
                current_node = Node()
                current_node.board = board
                current_node.street = 2
                current_node.current_player = constants.players.P1
                pot_size = pot_size_features[i][0] * arguments.stack
                current_node.bets = np.array([pot_size, pot_size])
                p1_range = ranges[0][i]
                p2_range = ranges[1][i]
                resolving.resolve_first_node(current_node, p1_range, p2_range)
                root_values = resolving.get_root_cfv_both_players()
                root_values *= 1 / pot_size
                values[:, i, :] = root_values
            # translating values to nn targets
            for player in range(PC):
                start_idx, end_idx = player_indexes[player]
                bucket_conversion.card_range_to_bucket_range(
                    values[player], targets[b * BS:(b + 1) * BS,
                                            start_idx:end_idx])
            # computing a mask of possible buckets
            bucket_mask = bucket_conversion.get_possible_bucket_mask()
            masks[b * BS:(b + 1) *
                  BS, :] = bucket_mask * np.ones([BS, bC], dtype=np.uint8)
        fpath = os.path.join(self.dirpath, '{}.{}')
        np.save(fpath.format('inputs', self.counter),
                inputs.astype(np.float32))
        np.save(fpath.format('targets', self.counter),
                targets.astype(np.float32))
        np.save(fpath.format('masks', self.counter), masks.astype(np.uint8))
예제 #9
0
class BucketConversion():
    def __init__(self):
        self.bucketer = None
        self.bucket_count = None
        self._range_matrix = None
        self._reverse_value_matrix = None

    def set_board(self, board):
        ''' Sets the board cards for the bucketer.
		@param: board a non-empty vector of board cards
		ex: BCC = 1 => BC = 6, CC = 6
			buckets = [-1 13 14 -1 16 17]
			self._range_matrix =    	 12 13 14 15 16 17  (indexes)
										  V  V  V  V  V  V
	[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
	  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
	 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
	 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
	 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
	 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
		'''
        self.bucketer = Bucketer()
        self.bucket_count = self.bucketer.get_bucket_count()  # BC*CC
        CC, bC = game_settings.card_count, self.bucket_count
        self._range_matrix = np.zeros([CC, bC], dtype=arguments.int_dtype)
        class_ids = np.arange(bC, dtype=arguments.int_dtype)
        class_ids = class_ids.reshape([1, bC]) * np.ones([CC, bC],
                                                         dtype=class_ids.dtype)
        buckets = self.bucketer.compute_buckets(
            board)  # [BC*CC, ..., (BC+1)*CC)
        card_buckets = buckets.reshape([CC, 1]) * np.ones([CC, bC],
                                                          dtype=buckets.dtype)
        # finding all strength classes
        # matrix for transformation from card ranges to strength class ranges
        self._range_matrix[class_ids == card_buckets] = 1
        # matrix for transformation form class values to card values
        self._reverse_value_matrix = self._range_matrix.T.copy()

    def card_range_to_bucket_range(self, card_range, bucket_range):
        ''' Converts a range vector over private hands to a range vector over buckets.
		@{set_board} must be called first. Used to create inputs to the neural net.
		@param: card_range (1, CC) a probability vector over private hands
		@param: bucket_range (1,bC) a vector in which to save the resulting probability
				vector over buckets
		'''
        bucket_range[:, :] = np.dot(card_range, self._range_matrix)

    def bucket_value_to_card_value(self, bucket_value, card_value):
        ''' Converts a value vector over buckets to a value vector over private hands.
		@{set_board} must be called first. Used to process neural net outputs.
		@param: bucket_value a vector of values over buckets
		@param: card_value a vector in which to save the resulting vector of values
				over private hands
		'''
        card_value[:, :] = np.dot(bucket_value, self._reverse_value_matrix)

    def get_possible_bucket_mask(self):
        ''' Gives a vector of possible buckets on the the board.
		@{set_board} must be called first.
		@return a mask vector over buckets where each entry is 1 if the bucket is
				valid, 0 if not
		'''
        # CC = game_settings.card_count
        # mask = np.zeros([1, self.bucket_count], dtype=arguments.dtype)
        # card_indicator = np.ones([1,CC], dtype=arguments.dtype)
        # mask = np.dot(card_indicator, self._range_matrix)
        mask = np.sum(self._range_matrix, axis=0,
                      keepdims=True)  # (1,self.bucket_count)
        return mask