def test_project_log_stochastic_matrix_wrt_kl_divergence(self): """Tests KL-divergence projection routine on some known values.""" matrix = standard_ops.constant([[0.2, 0.8, 0.6], [0.1, 0.2, 1.5], [0.2, 1.0, 0.9]]) expected_projected_matrix = np.array([[0.4, 0.4, 0.2], [0.2, 0.1, 0.5], [0.4, 0.5, 0.3]]) with self.test_session() as session: projected_matrix = session.run( standard_ops.exp( swap_regret_optimizer. _project_log_stochastic_matrix_wrt_kl_divergence( standard_ops.log(matrix)))) self.assertAllClose( expected_projected_matrix, projected_matrix, rtol=0, atol=1e-6)
def test_project_log_stochastic_matrix_wrt_kl_divergence(self): """Tests KL-divergence projection routine on some known values.""" matrix = standard_ops.constant([[0.2, 0.8, 0.6], [0.1, 0.2, 1.5], [0.2, 1.0, 0.9]]) expected_projected_matrix = np.array([[0.4, 0.4, 0.2], [0.2, 0.1, 0.5], [0.4, 0.5, 0.3]]) with self.cached_session() as session: projected_matrix = session.run( standard_ops.exp( swap_regret_optimizer. _project_log_stochastic_matrix_wrt_kl_divergence( standard_ops.log(matrix)))) self.assertAllClose( expected_projected_matrix, projected_matrix, rtol=0, atol=1e-6)
def _project_log_stochastic_matrix_wrt_kl_divergence(log_matrix): """Projects its argument onto the set of log-left-stochastic matrices. Args: log_matrix: 2d square tensor, the element-wise logarithm of the matrix to project. Returns: The 2d square tensor that results from projecting exp(`matrix`) onto the set of left-stochastic matrices w.r.t. the KL-divergence applied column-wise. """ # For numerical reasons, make sure that the largest matrix element is zero # before exponentiating. log_matrix -= standard_ops.reduce_max(log_matrix, axis=0, keepdims=True) log_matrix -= standard_ops.log( standard_ops.reduce_sum( standard_ops.exp(log_matrix), axis=0, keepdims=True)) return log_matrix
def _project_log_stochastic_matrix_wrt_kl_divergence(log_matrix): """Projects its argument onto the set of log-left-stochastic matrices. Args: log_matrix: 2d square tensor, the element-wise logarithm of the matrix to project. Returns: The 2d square tensor that results from projecting exp(`matrix`) onto the set of left-stochastic matrices w.r.t. the KL-divergence applied column-wise. """ # For numerical reasons, make sure that the largest matrix element is zero # before exponentiating. log_matrix -= standard_ops.reduce_max(log_matrix, axis=0, keep_dims=True) log_matrix -= standard_ops.log( standard_ops.reduce_sum( standard_ops.exp(log_matrix), axis=0, keep_dims=True)) return log_matrix
def _projection_op(self, state, name=None): with ops.colocate_with(state): # Gets the dimension of the state (num_constraints + 1)--all of these # assertions are of things that should be impossible, since the state # passed into this method will have the same shape as that returned by # _initial_state(). state_shape = state.get_shape() assert state_shape is not None assert state_shape.ndims == 2 assert state_shape[0] == state_shape[1] dimension = state_shape.dims[0].value assert dimension is not None minimum_log_multiplier = standard_ops.log( self._minimum_multiplier_radius / standard_ops.to_float(dimension)) return state_ops.assign( state, standard_ops.maximum( _project_log_stochastic_matrix_wrt_kl_divergence(state), minimum_log_multiplier), name=name)
def _projection_op(self, state, name=None): with ops.colocate_with(state): # Gets the dimension of the state (num_constraints + 1)--all of these # assertions are of things that should be impossible, since the state # passed into this method will have the same shape as that returned by # _initial_state(). state_shape = state.get_shape() assert state_shape is not None assert state_shape.ndims == 2 assert state_shape[0] == state_shape[1] dimension = state_shape[0].value assert dimension is not None minimum_log_multiplier = standard_ops.log( self._minimum_multiplier_radius / standard_ops.to_float(dimension)) return state_ops.assign( state, standard_ops.maximum( _project_log_stochastic_matrix_wrt_kl_divergence(state), minimum_log_multiplier), name=name)