def _init_bucketing(self): ''' Initializes the tensor that translates hand ranges to bucket ranges. ''' self.bucketer = Bucketer() self.bucket_count = self.bucketer.get_bucket_count() boards = card_tools.get_second_round_boards() self.board_count = boards.shape[0] CC, BC, bC = game_settings.card_count, self.board_count, self.bucket_count self._range_matrix = np.zeros([CC, BC * bC], dtype=arguments.dtype) self._range_matrix_board_view = self._range_matrix.reshape( [CC, BC, bC]) for idx in range(BC): board = boards[idx] buckets = self.bucketer.compute_buckets(board) class_ids = np.arange(bC) class_ids = class_ids.reshape([1, bC]) * np.ones( [CC, bC], dtype=class_ids.dtype) card_buckets = buckets.reshape([CC, 1]) * np.ones( [CC, bC], dtype=class_ids.dtype) # finding all strength classes # matrix for transformation from card ranges to strength class ranges self._range_matrix_board_view[:, idx, :][class_ids == card_buckets] = 1 # matrix for transformation from class values to card values self._reverse_value_matrix = self._range_matrix.T.copy() # we need to div the matrix by the sum of possible boards # (from point of view of each hand) weight_constant = 1 / (BC - 2) # count self._reverse_value_matrix *= weight_constant
def generate_data_file(self, data_count, file_name): ''' Generates data files containing examples of random poker situations with associated terminal equity. Each poker situation is randomly generated using @{range_generator} and @{random_card_generator}. For description of neural net input and target type, see @{net_builder}. @param: data_count the number of examples to generate @param: file_name the prefix of the files where the data is saved (appended with `.inputs`, `.targets`, and `.mask`). ''' range_generator = RangeGenerator() batch_size = arguments.gen_batch_size assert(data_count % batch_size == 0, 'data count has to be divisible by the batch size') batch_count = data_count / batch_size bucketer = Bucketer() bucket_count = bucketer.get_bucket_count() player_count = 2 target_size = bucket_count * player_count targets = np.zeros([data_count, target_size], dtype=arguments.dtype) input_size = bucket_count * player_count + 1 inputs = np.zeros([data_count, input_size], dtype=arguments.dtype) mask = np.zeros([data_count, bucket_count], dtype=arguments.dtype) bucket_conversion = BucketConversion() equity = TerminalEquity() for batch in range(1, batch_count+1): board = card_generator.generate_cards(game_settings.board_card_count) range_generator.set_board(board) bucket_conversion.set_board(board) equity.set_board(board) # generating ranges ranges = np.zeros([player_count, batch_size, game_settings.card_count], dtype=arguments.dtype) for player in range(1, player_count+1): range_generator.generate_range(ranges[player]) pot_sizes = np.zeros([arguments.gen_batch_size, 1], dtype=arguments.dtype) # generating pot features pot_sizes = np.random.rand(batch_size) # translating ranges to features batch_index = ( (batch-1)*batch_size+1, batch*batch_size ) b_start, b_end = batch_index pot_feature_index = -1 inputs[ b_start:b_end , pot_feature_index ] = pot_sizes.copy() player_indexes = [(1,bucket_count), (bucket_count+1,bucket_count*2)] for player in range(1, player_count+1): p_start, p_end = player_indexes[player] bucket_conversion:card_range_to_bucket_range(ranges[player], inputs[ b_start:b_end , p_start:p_end ]) # computaton of values using terminal equity values = np.zeros([player_count, batch_size, game_settings.card_count], dtype=arguments.dtype) for player in range(1, player_count+1): opponent = 3 - player equity.call_value(ranges[opponent], values[player]) # translating values to nn targets for player in range(1, player_count+1): p_start, p_end = player_indexes[player] bucket_conversio.card_range_to_bucket_range(values[player], targets[ b_start:b_end , p_start:p_end ]) # computing a mask of possible buckets bucket_mask = bucket_conversion.get_possible_bucket_mask() mask[ b_start:b_end , : ] = bucket_mask.copy() * np.ones([batch_size, bucket_count], dtype=arguments.dtype) np.save(file_name + '.inputs', inputs) np.save(file_name + '.targets', targets) np.save(file_name + '.mask', mask)
def set_board(self, board): ''' Sets the board cards for the bucketer. @param: board a non-empty vector of board cards ex: BCC = 1 => BC = 6, CC = 6 buckets = [-1 13 14 -1 16 17] self._range_matrix = 12 13 14 15 16 17 (indexes) V V V V V V [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]] ''' self.bucketer = Bucketer() self.bucket_count = self.bucketer.get_bucket_count() # BC*CC CC, bC = game_settings.card_count, self.bucket_count self._range_matrix = np.zeros([CC, bC], dtype=arguments.int_dtype) class_ids = np.arange(bC, dtype=arguments.int_dtype) class_ids = class_ids.reshape([1, bC]) * np.ones([CC, bC], dtype=class_ids.dtype) buckets = self.bucketer.compute_buckets( board) # [BC*CC, ..., (BC+1)*CC) card_buckets = buckets.reshape([CC, 1]) * np.ones([CC, bC], dtype=buckets.dtype) # finding all strength classes # matrix for transformation from card ranges to strength class ranges self._range_matrix[class_ids == card_buckets] = 1 # matrix for transformation form class values to card values self._reverse_value_matrix = self._range_matrix.T.copy()
class MockNnTerminal(): def __init__(self): ''' Creates an equity matrix with entries for every possible pair of buckets. ''' self.bucketer = Bucketer() self.bucket_count = self.bucketer.get_bucket_count() bC, CC = self.bucket_count, game_settings.card_count self.equity_matrix = np.zeros([bC, bC], dtype=arguments.dtype) # filling equity matrix boards = card_tools.get_second_round_boards() self.board_count = boards.shape[0] self.terminal_equity = TerminalEquity() for i in range(self.board_count): board = boards[i] self.terminal_equity.set_board(board) call_matrix = self.terminal_equity.get_call_matrix() buckets = self.bucketer.compute_buckets(board) for c1 in range(CC): for c2 in range(CC): b1 = buckets[c1] b2 = buckets[c2] if b1 > 0 and b2 > 0: matrix_entry = call_matrix[c1][c2] self.equity_matrix[b1, b2] = matrix_entry def get_value(self, inputs, outputs): ''' Gives the expected showdown equity of the two players' ranges. @param: inputs An (N,I) tensor containing N instances of neural net inputs. See @{net_builder} for details of each input. @param: outputs An (N,O) tensor in which to store N sets of expected showdown counterfactual values for each player. ''' bC = self.bucket_count assert (outputs.ndim == 2) bucket_count = outputs.shape[1] / 2 batch_size = outputs.shape[0] player_indexes = [(0, bC), (bC, 2 * bC)] players_count = 2 for player in range(players_count): p_start, p_end = player_indexes[player] # player idx o_start, o_end = player_indexes[1 - player] # opponent idx outputs[:, p_start:p_end] = np.dot(inputs[:, o_start:o_end], self.equity_matrix)
def __init__(self): ''' Creates an equity matrix with entries for every possible pair of buckets. ''' self.bucketer = Bucketer() self.bucket_count = self.bucketer.get_bucket_count() bC, CC = self.bucket_count, game_settings.card_count self.equity_matrix = np.zeros([bC, bC], dtype=arguments.dtype) # filling equity matrix boards = card_tools.get_second_round_boards() self.board_count = boards.shape[0] self.terminal_equity = TerminalEquity() for i in range(self.board_count): board = boards[i] self.terminal_equity.set_board(board) call_matrix = self.terminal_equity.get_call_matrix() buckets = self.bucketer.compute_buckets(board) for c1 in range(CC): for c2 in range(CC): b1 = buckets[c1] b2 = buckets[c2] if b1 > 0 and b2 > 0: matrix_entry = call_matrix[c1][c2] self.equity_matrix[b1, b2] = matrix_entry
def build_net(self): ''' Builds a neural net with architecture specified by @{arguments.net}. @return a newly constructed neural net @return input shape (ex: [224,224,3] if img) @return output shape (ex: [10] if 10 classes) ''' # input and output parameters bucketer = Bucketer() bucket_count = bucketer.get_bucket_count() player_count = 2 num_output = bucket_count * player_count num_input = num_output + 1 input_shape = [num_input] output_shape = [num_output] # neural network architecture m_input = keras.layers.Input(input_shape, name='input') # slicing off pot size ([1,2001] -> [1,2000]) sp = keras.layers.Lambda(lambda x: x[:, :-1], name='input_ranges')(m_input) # feed forward part ff = m_input for i in range(arguments.num_layers): names = [s.format(i) for s in ('dense_{}', 'prelu_{}')] ff = keras.layers.Dense(arguments.num_neurons, name=names[0])(ff) ff = keras.layers.PReLU(name=names[1])(ff) ff = keras.layers.Dense(num_output, name='feed_forward_output')(ff) # dot product of both (feed forward and player ranges) d = keras.layers.dot([ff, sp], axes=1, name='dot_product') # repeat this number from shape [1] -> [2000] d = keras.layers.RepeatVector(num_output, name='repeat_scalar')(d) d = keras.layers.Flatten(name='flatten')(d) # divide it by 2 d = keras.layers.Lambda(lambda x: x / 2, name='divide_by_2')(d) # subtract input (without pot) and last layer m_output = keras.layers.subtract([ff, d], name='zero_sum_output') model = keras.models.Model(m_input, m_output) return model, input_shape, output_shape
class NextRoundValue(): def __init__(self, value_nn): ''' Creates a tensor that can translate hand ranges to bucket ranges on any board. @param: Nn.ValueNn object ''' self._values_are_prepared = False self.nn = value_nn self._init_bucketing() def _init_bucketing(self): ''' Initializes the tensor that translates hand ranges to bucket ranges. ''' self.bucketer = Bucketer() self.bucket_count = self.bucketer.get_bucket_count() boards = card_tools.get_second_round_boards() self.board_count = boards.shape[0] CC, BC, bC = game_settings.card_count, self.board_count, self.bucket_count self._range_matrix = np.zeros([CC, BC * bC], dtype=arguments.dtype) self._range_matrix_board_view = self._range_matrix.reshape( [CC, BC, bC]) for idx in range(BC): board = boards[idx] buckets = self.bucketer.compute_buckets(board) class_ids = np.arange(bC) class_ids = class_ids.reshape([1, bC]) * np.ones( [CC, bC], dtype=class_ids.dtype) card_buckets = buckets.reshape([CC, 1]) * np.ones( [CC, bC], dtype=class_ids.dtype) # finding all strength classes # matrix for transformation from card ranges to strength class ranges self._range_matrix_board_view[:, idx, :][class_ids == card_buckets] = 1 # matrix for transformation from class values to card values self._reverse_value_matrix = self._range_matrix.T.copy() # we need to div the matrix by the sum of possible boards # (from point of view of each hand) weight_constant = 1 / (BC - 2) # count self._reverse_value_matrix *= weight_constant def _card_range_to_bucket_range(self, card_range, bucket_range): ''' Converts a range vector over private hands to a range vector over buckets. @param: card_range a probability vector over private hands @param: bucket_range a vector in which to store the output probabilities over buckets ''' bucket_range[:, :] = np.dot(card_range, self._range_matrix) def _bucket_value_to_card_value(self, bucket_value, card_value): ''' Converts a value vector over buckets to a value vector over private hands. @param: bucket_value a value vector over buckets @param: card_value a vector in which to store the output values over private hands ''' card_value[:, :] = np.dot(bucket_value, self._reverse_value_matrix) def _bucket_value_to_card_value_on_board(self, board, bucket_value, card_value): ''' Converts a value vector over buckets to a value vector over private hands given a particular set of board cards. @param: board a non-empty vector of board cards @param: bucket_value a value vector over buckets @param: card_value a vector in which to store the output values over private hands ''' CC, bC = game_settings.card_count, self.bucket_count board_idx = card_tools.get_board_index(board) board_matrix = self._range_matrix_board_view[:, board_idx, :].T serialized_card_value = card_value.reshape([-1, CC]) serialized_bucket_value = bucket_value[:, :, board_idx, :].copy().reshape( [-1, bC]) serialized_card_value[:, :] = np.dot(serialized_bucket_value, board_matrix) def start_computation(self, pot_sizes): ''' Initializes the value calculator with the pot size of each state that we are going to evaluate. During continual re-solving, there is one pot size for each initial state of the second betting round (before board cards are dealt). ? at this point betting round ends ? @param pot_sizes a vector of pot sizes ''' self.iter = 0 self.pot_sizes = pot_sizes.reshape([-1, 1]).copy() self.batch_size = pot_sizes.shape[0] def get_value(self, ranges, values): ''' Gives the predicted counterfactual values at each evaluated state, given input ranges. @{start_computation} must be called first. Each state to be evaluated must be given in the same order that pot sizes were given for that function. Keeps track of iterations internally, so should be called exactly once for every iteration of continual re-solving. @param: ranges An (N,2,K) tensor, where N is the number of states evaluated (must match input to @{start_computation}), 2 is the number of players, and K is the number of private hands. Contains N sets of 2 range vectors. @param: values an (N,2,K) tensor in which to store the N sets of 2 value vectors which are output ''' PC, BC = constants.players_count, self.board_count BS, bC = self.batch_size, self.bucket_count assert (ranges is not None and values is not None) assert (ranges.shape[0] == self.batch_size) self.iter += 1 if self.iter == 1: # initializing data structures self.next_round_inputs = np.zeros([BS, BC, bC * PC + 1], dtype=arguments.dtype) self.next_round_values = np.zeros([BS, BC, PC, bC], dtype=arguments.dtype) self.transposed_next_round_values = np.zeros([BS, PC, BC, bC], dtype=arguments.dtype) self.next_round_extended_range = np.zeros([BS, PC, BC * bC], dtype=arguments.dtype) self.next_round_serialized_range = self.next_round_extended_range.reshape( [-1, bC]) self.range_normalization = np.zeros([]) self.value_normalization = np.zeros([BS, PC, BC], dtype=arguments.dtype) # handling pot feature for the nn nn_bet_input = self.pot_sizes.copy() * (1 / arguments.stack) nn_bet_input = nn_bet_input.reshape([-1, 1]) * np.ones( [BS, BC], dtype=nn_bet_input.dtype) self.next_round_inputs[:, :, -1] = nn_bet_input.copy() # we need to find if we need remember something in this iteration use_memory = self.iter > arguments.cfr_skip_iters if use_memory and self.iter == arguments.cfr_skip_iters + 1: # first iter that we need to remember something - we need to init data structures self.range_normalization_memory = np.zeros([BS * BC * PC, 1], dtype=arguments.dtype) self.counterfactual_value_memory = np.zeros([BS, PC, BC, bC], dtype=arguments.dtype) # computing bucket range in next street for both players at once self._card_range_to_bucket_range( ranges.reshape([BS * PC, -1]), self.next_round_extended_range.reshape([BS * PC, -1])) self.range_normalization = np.sum(self.next_round_serialized_range, axis=1, keepdims=True) rn_view = self.range_normalization.reshape([BS, PC, BC]) for player in range(constants.players_count): self.value_normalization[:, player, :] = rn_view[:, 1 - player, :].copy() if use_memory: self.range_normalization_memory += self.value_normalization.reshape( self.range_normalization_memory.shape) # eliminating division by zero self.range_normalization[self.range_normalization == 0] = 1 self.next_round_serialized_range /= self.range_normalization * np.ones_like( self.next_round_serialized_range) serialized_range_by_player = self.next_round_serialized_range.reshape( [BS, PC, BC, bC]) for player in range(constants.players_count): self.next_round_inputs[:, :, player * bC:( player + 1) * bC] = self.next_round_extended_range[:, player, :].copy( ).reshape(self.next_round_inputs[:, :, player * bC:(player + 1) * bC].shape) # using nn to compute values serialized_inputs_view = self.next_round_inputs.reshape([BS * BC, -1]) serialized_values_view = self.next_round_values.reshape([BS * BC, -1]) # computing value in the next round self.nn.get_value(serialized_inputs_view, serialized_values_view) # normalizing values back according to the orginal range sum normalization_view = np.transpose( self.value_normalization.reshape([BS, PC, BC, 1]), [0, 2, 1, 3]) # :transpose(2,3) self.next_round_values *= normalization_view * np.ones_like( self.next_round_values) self.transposed_next_round_values = np.transpose( self.next_round_values, [0, 2, 1, 3]).copy() # :transpose(3,2) # remembering the values for the next round if use_memory: self.counterfactual_value_memory += self.transposed_next_round_values # translating bucket values back to the card values self._bucket_value_to_card_value( self.transposed_next_round_values.reshape([BS * PC, -1]), values.reshape([BS * PC, -1])) def get_value_on_board(self, board, values): ''' Gives the average counterfactual values on the given board across previous calls to @{get_value}. Used to update opponent counterfactual values during re-solving after board cards are dealt. @param: board a non-empty vector of board cards @param: values a tensor in which to store the values ''' # check if we have evaluated correct number of iterations assert (self.iter == arguments.cfr_iters) batch_size = values.shape[0] assert (batch_size == self.batch_size) self._prepare_next_round_values() self._bucket_value_to_card_value_on_board( board, self.counterfactual_value_memory, values) def _prepare_next_round_values(self): ''' Normalizes the counterfactual values remembered between @{get_value} calls so that they are an average rather than a sum. ''' bC = self.bucket_count assert (self.iter == arguments.cfr_iters) # do nothing if already prepared if self._values_are_prepared: return # eliminating division by zero self.range_normalization_memory[self.range_normalization_memory == 0] = 1 serialized_memory_view = self.counterfactual_value_memory.reshape( [-1, bC]) serialized_memory_view[:, :] /= self.range_normalization_memory * np.ones_like( serialized_memory_view) self._values_are_prepared = True
def generate_data_file(self, data_count): ''' Generates data files containing examples of random poker situations with counterfactual values from an associated solution. Each poker situation is randomly generated using @{range_generator} and @{random_card_generator}. For description of neural net input and target type, see @{net_builder}. @param: data_count the number of examples to generate ''' BS, PC = arguments.gen_batch_size, constants.players_count BCC, CC = game_settings.board_card_count, game_settings.card_count range_generator = RangeGenerator() assert (data_count % BS == 0, 'data count has to be divisible by the batch size') batch_count = int(data_count / BS) bucketer = Bucketer() bucket_count = bucketer.get_bucket_count() bC = bucket_count target_size = bC * PC targets = np.zeros([data_count, target_size], dtype=arguments.dtype) input_size = bC * PC + 1 inputs = np.zeros([data_count, input_size], dtype=arguments.dtype) masks = np.zeros([data_count, bC], dtype=np.uint8) bucket_conversion = BucketConversion() for b in tqdm(range(batch_count)): board = card_generator.generate_cards(BCC) range_generator.set_board(board) bucket_conversion.set_board(board) # generating ranges ranges = np.zeros([PC, BS, CC], dtype=arguments.dtype) for player in range(PC): range_generator.generate_range(ranges[player]) # just simple random card generator below # prob = np.random.rand(BS,CC) # ranges[player] = prob / np.sum(prob, axis=1).reshape([BS,1]) # generating pot sizes between ante and stack - 0.1 min_pot = arguments.ante max_pot = arguments.stack - 0.1 pot_range = max_pot - min_pot random_pot_sizes = np.random.rand( BS, 1) * pot_range + min_pot # (BS,1) # pot features are pot sizes normalized between (ante/stack,1) pot_size_features = random_pot_sizes.copy() * (1 / arguments.stack) # translating ranges to features pot_feature_index = -1 inputs[b * BS:(b + 1) * BS, pot_feature_index] = pot_size_features.reshape( inputs[b * BS:(b + 1) * BS, pot_feature_index].shape) player_indexes = [(0, bC), (bC, bC * 2)] for player in range(PC): start_idx, end_idx = player_indexes[player] bucket_conversion.card_range_to_bucket_range( ranges[player], inputs[b * BS:(b + 1) * BS, start_idx:end_idx]) # computaton of values using re-solving values = np.zeros([PC, BS, CC], dtype=arguments.dtype) for i in range(BS): resolving = Resolving() current_node = Node() current_node.board = board current_node.street = 2 current_node.current_player = constants.players.P1 pot_size = pot_size_features[i][0] * arguments.stack current_node.bets = np.array([pot_size, pot_size]) p1_range = ranges[0][i] p2_range = ranges[1][i] resolving.resolve_first_node(current_node, p1_range, p2_range) root_values = resolving.get_root_cfv_both_players() root_values *= 1 / pot_size values[:, i, :] = root_values # translating values to nn targets for player in range(PC): start_idx, end_idx = player_indexes[player] bucket_conversion.card_range_to_bucket_range( values[player], targets[b * BS:(b + 1) * BS, start_idx:end_idx]) # computing a mask of possible buckets bucket_mask = bucket_conversion.get_possible_bucket_mask() masks[b * BS:(b + 1) * BS, :] = bucket_mask * np.ones([BS, bC], dtype=np.uint8) fpath = os.path.join(self.dirpath, '{}.{}') np.save(fpath.format('inputs', self.counter), inputs.astype(np.float32)) np.save(fpath.format('targets', self.counter), targets.astype(np.float32)) np.save(fpath.format('masks', self.counter), masks.astype(np.uint8))
class BucketConversion(): def __init__(self): self.bucketer = None self.bucket_count = None self._range_matrix = None self._reverse_value_matrix = None def set_board(self, board): ''' Sets the board cards for the bucketer. @param: board a non-empty vector of board cards ex: BCC = 1 => BC = 6, CC = 6 buckets = [-1 13 14 -1 16 17] self._range_matrix = 12 13 14 15 16 17 (indexes) V V V V V V [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]] ''' self.bucketer = Bucketer() self.bucket_count = self.bucketer.get_bucket_count() # BC*CC CC, bC = game_settings.card_count, self.bucket_count self._range_matrix = np.zeros([CC, bC], dtype=arguments.int_dtype) class_ids = np.arange(bC, dtype=arguments.int_dtype) class_ids = class_ids.reshape([1, bC]) * np.ones([CC, bC], dtype=class_ids.dtype) buckets = self.bucketer.compute_buckets( board) # [BC*CC, ..., (BC+1)*CC) card_buckets = buckets.reshape([CC, 1]) * np.ones([CC, bC], dtype=buckets.dtype) # finding all strength classes # matrix for transformation from card ranges to strength class ranges self._range_matrix[class_ids == card_buckets] = 1 # matrix for transformation form class values to card values self._reverse_value_matrix = self._range_matrix.T.copy() def card_range_to_bucket_range(self, card_range, bucket_range): ''' Converts a range vector over private hands to a range vector over buckets. @{set_board} must be called first. Used to create inputs to the neural net. @param: card_range (1, CC) a probability vector over private hands @param: bucket_range (1,bC) a vector in which to save the resulting probability vector over buckets ''' bucket_range[:, :] = np.dot(card_range, self._range_matrix) def bucket_value_to_card_value(self, bucket_value, card_value): ''' Converts a value vector over buckets to a value vector over private hands. @{set_board} must be called first. Used to process neural net outputs. @param: bucket_value a vector of values over buckets @param: card_value a vector in which to save the resulting vector of values over private hands ''' card_value[:, :] = np.dot(bucket_value, self._reverse_value_matrix) def get_possible_bucket_mask(self): ''' Gives a vector of possible buckets on the the board. @{set_board} must be called first. @return a mask vector over buckets where each entry is 1 if the bucket is valid, 0 if not ''' # CC = game_settings.card_count # mask = np.zeros([1, self.bucket_count], dtype=arguments.dtype) # card_indicator = np.ones([1,CC], dtype=arguments.dtype) # mask = np.dot(card_indicator, self._range_matrix) mask = np.sum(self._range_matrix, axis=0, keepdims=True) # (1,self.bucket_count) return mask