def evaluate_state(self, env: MancalaEnv) -> (float, float): flip_board = env.side_to_move == Side.NORTH state = env.board.get_board_image(flipped=flip_board) mask = env.get_action_mask_with_no_pie() dist, _, value = self.network.evaluate_move(state=state, mask=mask) return dist, float(value)
def sample_state(self, env: MancalaEnv) -> (int, float): flip_board = env.side_to_move == Side.NORTH state = env.board.get_board_image(flipped=flip_board) mask = env.get_action_mask_with_no_pie() return self.network.sample(state=state, mask=mask)