def test_is_geq_0(self): N, lam = 100, 4 tol = 1e-8 poisson = rndm.poisson(lam=lam, size=N) pm_poisson = poisson.copy() pm_poisson[:N // 2] *= -1 list_of_inputs = [(True, None), (True, np.zeros(N)), (True, np.ones(N)), (False, -np.ones(N)), (True, -tol * np.ones(N)), (False, -2 * tol * np.ones(N)), (True, poisson), (False, pm_poisson)] for idx, (flag, _input) in enumerate(list_of_inputs): with self.subTest(index=idx, is_geq_0=flag): if flag: self.assertTrue(utils.is_geq_0(_input, tol) is _input) else: with self.assertRaises(ValueError) as context: utils.is_geq_0(_input, tol) self.assertIn('not all >= 0', str(context.exception))
def test_is_geq_0(self): N, lam = 100, 4 poisson = rndm.poisson(lam=lam, size=N) self.assertTrue(np.allclose(utils.is_geq_0(poisson), poisson)) gaussian = 5 * rndm.randn(N) gaussian[:N // 2] = poisson[:N // 2] with self.assertRaises(ValueError) as context: utils.is_geq_0(gaussian) self.assertTrue('not all >= 0' in str(context.exception))
def __init__(self, kernel_type, projection=False, **params): self.kernel_type = kernel_type self.projection = projection self.params_keys = set(params.keys()) self.__check_args_coherence() # Sampling self.sampling_mode = 'GS' # Gram-Schmidt self.list_of_samples = [] # when using .sample_k_dpp_* self.size_k_dpp = 0 self.E_poly = None # evaluation of the # Attributes relative to K correlation kernel: # K, K_eig_vals, K_eig_vecs, A_zono self.K = is_symmetric(params.get('K', None)) if self.projection: self.K = is_projection(self.K) e_vals, e_vecs = params.get('K_eig_dec', [None, None]) if self.projection: self.K_eig_vals = is_equal_to_O_or_1(e_vals) else: self.K_eig_vals = is_in_01(e_vals) self.eig_vecs = is_orthonormal_columns(e_vecs) self.A_zono = is_full_row_rank(params.get('A_zono', None)) # Attributes relative to L likelihood kernel: # L, L_eig_vals, L_eig_vecs, L_gram_factor, L_dual, L_dual_eig_vals, L_dual_eig_vecs self.L = is_symmetric(params.get('L', None)) if self.projection: self.L = is_projection(self.L) e_vals, e_vecs = params.get('L_eig_dec', [None, None]) if self.projection: self.L_eig_vals = is_equal_to_O_or_1(e_vals) else: self.L_eig_vals = is_geq_0(e_vals) if self.eig_vecs is None: # K_eig_vecs = L_eig_vecs self.eig_vecs = is_orthonormal_columns(e_vecs) # L' "dual" likelihood kernel, L' = Phi Phi.T, Phi = L_gram_factor self.L_gram_factor = params.get('L_gram_factor', None) self.L_dual = None self.L_dual_eig_vals = None self.L_dual_eig_vecs = None if self.L_gram_factor is not None: Phi = self.L_gram_factor d, N = Phi.shape if d < N: self.L_dual = Phi.dot(Phi.T) print('L_dual = Phi Phi.T was computed: Phi (dxN) with d<N') else: if self.L is None: self.L = Phi.T.dot(Phi) print('L = Phi.T Phi was computed: Phi (dxN) with d>=N')
def compute_K(self, msg=False): """ Compute the correlation kernel :math:`\\mathbf{K}` from the original parametrization of the :class:`FiniteDPP` object. The kernel is stored in the :py:attr:`~FiniteDPP.K` attribute. .. seealso:: :ref:`finite_dpps_relation_kernels` """ if self.K is not None: # msg = 'K (correlation) kernel available' # print(msg) pass else: if not msg: print('K (correlation) kernel computed via:') if self.K_eig_vals is not None: msg = '- U diag(eig_K) U.T' print(msg) self.K = (self.eig_vecs * self.K_eig_vals).dot(self.eig_vecs.T) elif self.A_zono is not None: msg = '\n'.join([ '- K = A.T (AA.T)^-1 A, using', '- U = QR(A.T)', '- K = U U.T' ]) print(msg) self.K_eig_vals = np.ones(self.A_zono.shape[0]) self.eig_vecs, _ = la.qr(self.A_zono.T, mode='economic') self.K = self.eig_vecs.dot(self.eig_vecs.T) elif self.L_eig_vals is not None: msg = '- eig_K = eig_L/(1+eig_L)' print(msg) self.K_eig_vals = self.L_eig_vals / (1.0 + self.L_eig_vals) self.compute_K(msg=True) elif self.L is not None: msg = '- eigendecomposition of L' print(msg) self.L_eig_vals, self.eig_vecs = la.eigh(self.L) self.L_eig_vals = is_geq_0(self.L_eig_vals) self.compute_K(msg=True) else: self.compute_L(msg=True) self.compute_K(msg=True)
def __init__(self, kernel_type, projection=False, **params): self.kernel_type = kernel_type self.projection = projection self.params_keys = set(params.keys()) self.__check_args_coherence() # Sampling self.sampling_mode = 'GS' # Gram-Schmidt self.list_of_samples = [] # when using .sample_k_dpp_* self.size_k_dpp = 0 self.E_poly = None # evaluation of the # Attributes relative to K correlation kernel: # K, K_eig_vals, K_eig_vecs, A_zono self.K = is_symmetric(params.get('K', None)) if self.projection: self.K = is_projection(self.K) e_vals, e_vecs = params.get('K_eig_dec', [None, None]) if self.projection: self.K_eig_vals = is_equal_to_O_or_1(e_vals) else: self.K_eig_vals = is_in_01(e_vals) self.eig_vecs = is_orthonormal_columns(e_vecs) self.A_zono = is_full_row_rank(params.get('A_zono', None)) # Attributes relative to L likelihood kernel: # L, L_eig_vals, L_eig_vecs, L_gram_factor, L_dual self.L = is_symmetric(params.get('L', None)) if self.projection: self.L = is_projection(self.L) e_vals, e_vecs = params.get('L_eig_dec', [None, None]) if self.projection: self.L_eig_vals = is_equal_to_O_or_1(e_vals) else: self.L_eig_vals = is_geq_0(e_vals) if self.eig_vecs is None: # K_eig_vecs = L_eig_vecs self.eig_vecs = is_orthonormal_columns(e_vecs) # L' "dual" likelihood kernel, L' = Phi Phi.T, Phi = L_gram_factor self.L_gram_factor = params.get('L_gram_factor', None) self.L_dual = None if self.L_gram_factor is not None: Phi = self.L_gram_factor d, N = Phi.shape if d < N: self.L_dual = Phi.dot(Phi.T) print('L_dual = Phi Phi.T was computed: Phi (dxN) with d<N') else: if self.L is None: self.L = Phi.T.dot(Phi) print('L = Phi.T Phi was computed: Phi (dxN) with d>=N') # L likelihood function representation # eval_L(X, Y) = L(X, Y) # eval_L(X) = L(X, X) self.eval_L, self.X_data = params.get('L_eval_X_data', [None, None]) self.intermediate_sample_info = None if self.eval_L is not None: if not callable(self.eval_L): raise ValueError( 'eval_L should be a positive semi-definite kernel function' ) if self.X_data is not None: if not (self.X_data.size and self.X_data.ndim == 2): err_print = [ 'Wrong shape = {}'.format(self.X_data.shape), 'X_data should be a non empty (N x d) ndarray' ] raise ValueError('\n'.join(err_print))
def sample_exact_k_dpp(self, size, mode='GS', **params): """ Sample exactly from :math:`\\operatorname{k-DPP}`. A priori the :class:`FiniteDPP <FiniteDPP>` object was instanciated by its likelihood :math:`\\mathbf{L}` kernel so that .. math:: \\mathbb{P}_{\\operatorname{k-DPP}}(\\mathcal{X} = S) \\propto \\det \\mathbf{L}_S ~ 1_{|S|=k} :param size: size :math:`k` of the :math:`\\operatorname{k-DPP}` :type size: int :param mode: - ``projection=True``: - ``'GS'`` (default): Gram-Schmidt on the rows of :math:`\\mathbf{K}`. - ``'Schur'``: Use Schur complement to compute conditionals. - ``projection=False``: - ``'GS'`` (default): Gram-Schmidt on the rows of the eigenvectors of :math:`\\mathbf{K}` selected in Phase 1. - ``'GS_bis'``: Slight modification of ``'GS'`` - ``'KuTa12'``: Algorithm 1 in :cite:`KuTa12` - ``'vfx'``: the dpp-vfx rejection sampler in :cite:`DeCaVa19` :type mode: string, default ``'GS'`` :param dict params: Dictionary containing the parameters for exact samplers with keys ``'random_state'`` (default None) - If ``mode='vfx'`` See :py:meth:`~dppy.exact_sampling.k_dpp_vfx_sampler` for a full list of all parameters accepted by 'vfx' sampling. We report here the most impactful + ``'rls_oversample_dppvfx'`` (default 4.0) Oversampling parameter used to construct dppvfx's internal Nystrom approximation. This makes each rejection round slower and more memory intensive, but reduces variance and the number of rounds of rejections. + ``'rls_oversample_bless'`` (default 4.0) Oversampling parameter used during bless's internal Nystrom approximation. This makes the one-time pre-processing slower and more memory intensive, but reduces variance and the number of rounds of rejections Empirically, a small factor [2,10] seems to work for both parameters. It is suggested to start with a small number and increase if the algorithm fails to terminate. :return: A sample from the corresponding :math:`\\operatorname{k-DPP}`. In any case, the sample is appended to the :py:attr:`~FiniteDPP.list_of_samples` attribute as a list. :rtype: list .. note:: Each time you call this method, the sample is appended to the :py:attr:`~FiniteDPP.list_of_samples` attribute as a list. The :py:attr:`~FiniteDPP.list_of_samples` attribute can be emptied using :py:meth:`~FiniteDPP.flush_samples` .. caution:: The underlying kernel :math:`\\mathbf{K}`, resp. :math:`\\mathbf{L}` must be real valued for now. .. seealso:: - :py:meth:`~FiniteDPP.sample_exact` - :py:meth:`~FiniteDPP.sample_mcmc_k_dpp` """ rng = check_random_state(params.get('random_state', None)) self.sampling_mode = mode self.size_k_dpp = size if self.sampling_mode == 'vfx': if self.eval_L is None or self.X_data is None: raise ValueError( "The vfx sampler is currently only available for the 'L_eval_X_data' representation." ) params.pop("random_state", None) sampl, self.intermediate_sample_info = k_dpp_vfx_sampler( size, self.intermediate_sample_info, self.X_data, self.eval_L, random_state=rng, **params) # If DPP defined via projection kernel elif self.projection: if self.kernel_type == 'correlation': if self.K_eig_vals is not None: rank = np.rint(np.sum(self.K_eig_vals)).astype(int) elif self.A_zono is not None: rank = self.A_zono.shape[0] else: # self.K is not None rank = np.rint(np.trace(self.K)).astype(int) if size != rank: raise ValueError( 'size k={} != rank={} for projection correlation K kernel' .format(size, rank)) if self.K_eig_vals is not None: # K_eig_vals > 0.5 below to get indices where e_vals = 1 sampl = proj_dpp_sampler_eig( eig_vecs=self.eig_vecs[:, self.K_eig_vals > 0.5], mode=self.sampling_mode, size=size, random_state=rng) elif self.A_zono is not None: warn( 'DPP defined via `A_zono`, apriori you want to use `sampl_mcmc`, but you have called `sample_exact`' ) self.K_eig_vals = np.ones(rank) self.eig_vecs, _ = la.qr(self.A_zono.T, mode='economic') sampl = proj_dpp_sampler_eig(eig_vecs=self.eig_vecs, mode=self.sampling_mode, size=size, random_state=rng) else: sampl = proj_dpp_sampler_kernel(kernel=self.K, mode=self.sampling_mode, size=size, random_state=rng) else: # self.kernel_type == 'likelihood': if self.L_eig_vals is not None: # L_eig_vals > 0.5 below to get indices where e_vals = 1 sampl = proj_dpp_sampler_eig( eig_vecs=self.eig_vecs[:, self.L_eig_vals > 0.5], mode=self.sampling_mode, size=size, random_state=rng) else: self.compute_L() # size > rank treated internally in proj_dpp_sampler_kernel sampl = proj_dpp_sampler_kernel(self.L, mode=self.sampling_mode, size=size, random_state=rng) # If eigen decoposition of K, L or L_dual is available USE IT! elif self.L_eig_vals is not None: # Phase 1 # Precompute elementary symmetric polynomials if self.E_poly is None or self.size_k_dpp < size: self.E_poly = elementary_symmetric_polynomials( self.L_eig_vals, size) # Select eigenvectors V = k_dpp_eig_vecs_selector(self.L_eig_vals, self.eig_vecs, size=size, E_poly=self.E_poly, random_state=rng) # Phase 2 self.size_k_dpp = size sampl = proj_dpp_sampler_eig(V, self.sampling_mode, random_state=rng) elif self.K_eig_vals is not None: np.seterr(divide='raise') self.L_eig_vals = self.K_eig_vals / (1.0 - self.K_eig_vals) return self.sample_exact_k_dpp(size, self.sampling_mode, random_state=rng) # Otherwise eigendecomposition is necessary elif self.L_dual is not None: # L_dual = Phi Phi.T = W Theta W.T # L = Phi.T Phi = V Gamma V.T # implies Gamma = Theta and V = Phi.T W Theta^{-1/2} self.L_eig_vals, L_dual_eig_vecs = la.eigh(self.L_dual) self.L_eig_vals = is_geq_0(self.L_eig_vals) self.eig_vecs = self.L_gram_factor.T.dot(L_dual_eig_vecs / np.sqrt(self.L_eig_vals)) return self.sample_exact_k_dpp(size, mode=self.sampling_mode, random_state=rng) elif self.L is not None: self.L_eig_vals, self.eig_vecs = la.eigh(self.L) self.L_eig_vals = is_geq_0(self.L_eig_vals) return self.sample_exact_k_dpp(size, self.sampling_mode, random_state=rng) elif self.K is not None: self.K_eig_vals, self.eig_vecs = la.eigh(self.K) self.K_eig_vals = is_in_01(self.K_eig_vals) return self.sample_exact_k_dpp(size, self.sampling_mode, random_state=rng) elif self.eval_L is not None and self.X_data is not None: # In case mode!='vfx' self.compute_L() return self.sample_exact_k_dpp(size, self.sampling_mode, random_state=rng) else: raise ValueError( 'None of the available samplers could be used based on the current DPP representation. This should never happen, please consider rasing an issue on github at https://github.com/guilgautier/DPPy/issues' ) self.list_of_samples.append(sampl) return sampl
def sample_exact(self, mode='GS', **params): """ Sample exactly from the corresponding :class:`FiniteDPP <FiniteDPP>` object. :param mode: - ``projection=True``: - ``'GS'`` (default): Gram-Schmidt on the rows of :math:`\\mathbf{K}`. - ``'Chol'`` :cite:`Pou19` Algorithm 3 - ``'Schur'``: when DPP defined from correlation kernel ``K``, use Schur complement to compute conditionals - ``projection=False``: - ``'GS'`` (default): Gram-Schmidt on the rows of the eigenvectors of :math:`\\mathbf{K}` selected in Phase 1. - ``'GS_bis'``: Slight modification of ``'GS'`` - ``'Chol'`` :cite:`Pou19` Algorithm 1 - ``'KuTa12'``: Algorithm 1 in :cite:`KuTa12` - ``'vfx'``: the dpp-vfx rejection sampler in :cite:`DeCaVa19` :type mode: string, default ``'GS'`` :param dict params: Dictionary containing the parameters for exact samplers with keys - ``'random_state'`` (default None) - If ``mode='vfx'`` See :py:meth:`~dppy.exact_sampling.dpp_vfx_sampler` for a full list of all parameters accepted by 'vfx' sampling. We report here the most impactful + ``'rls_oversample_dppvfx'`` (default 4.0) Oversampling parameter used to construct dppvfx's internal Nystrom approximation. This makes each rejection round slower and more memory intensive, but reduces variance and the number of rounds of rejections. + ``'rls_oversample_bless'`` (default 4.0) Oversampling parameter used during bless's internal Nystrom approximation. This makes the one-time pre-processing slower and more memory intensive, but reduces variance and the number of rounds of rejections Empirically, a small factor [2,10] seems to work for both parameters. It is suggested to start with a small number and increase if the algorithm fails to terminate. :return: Returns a sample from the corresponding :class:`FiniteDPP <FiniteDPP>` object. In any case, the sample is appended to the :py:attr:`~FiniteDPP.list_of_samples` attribute as a list. :rtype: list .. note:: Each time you call this method, the sample is appended to the :py:attr:`~FiniteDPP.list_of_samples` attribute as a list. The :py:attr:`~FiniteDPP.list_of_samples` attribute can be emptied using :py:meth:`~FiniteDPP.flush_samples` .. caution:: The underlying kernel :math:`\\mathbf{K}`, resp. :math:`\\mathbf{L}` must be real valued for now. .. seealso:: - :ref:`finite_dpps_exact_sampling` - :py:meth:`~FiniteDPP.flush_samples` - :py:meth:`~FiniteDPP.sample_mcmc` """ rng = check_random_state(params.get('random_state', None)) self.sampling_mode = mode if self.sampling_mode == 'Schur': if self.kernel_type == 'correlation' and self.projection: self.compute_K() sampl = proj_dpp_sampler_kernel(self.K, self.sampling_mode, random_state=rng) else: err_print =\ ['`Schur` sampling mode is only available for projection DPPs, i.e., `kernel_type="correlation"` and `projection=True`', 'Given: {}'.format((self.kernel_type, self.projection))] raise ValueError('\n'.join(err_print)) elif self.sampling_mode == 'Chol': self.compute_K() if self.kernel_type == 'correlation' and self.projection: sampl = proj_dpp_sampler_kernel(self.K, self.sampling_mode, random_state=rng) else: sampl, _ = dpp_sampler_generic_kernel(self.K, random_state=rng) elif self.sampling_mode == 'vfx': if self.eval_L is None or self.X_data is None: raise ValueError( 'The vfx sampler is currently only available with ' '{"L_eval_X_data": (L_eval, X_data)} representation.') params.pop("random_state", None) sampl, self.intermediate_sample_info = dpp_vfx_sampler( self.intermediate_sample_info, self.X_data, self.eval_L, random_state=rng, **params) # If eigen decoposition of K, L or L_dual is available USE IT! elif self.K_eig_vals is not None: # Phase 1 if self.kernel_type == 'correlation' and self.projection: V = self.eig_vecs[:, self.K_eig_vals > 0.5] else: V = dpp_eig_vecs_selector(self.K_eig_vals, self.eig_vecs, random_state=rng) # Phase 2 sampl = proj_dpp_sampler_eig(V, self.sampling_mode, random_state=rng) # elif self.L_dual_eig_vals is not None: # # Phase 1 # V = dpp_eig_vecs_selector_L_dual(self.L_dual_eig_vals, # self.L_dual_eig_vecs, # self.L_gram_factor, # random_state=rng) # # Phase 2 # sampl = proj_dpp_sampler_eig(V, self.sampling_mode, # random_state=rng) # elif self.L_eig_vals is not None: self.K_eig_vals = self.L_eig_vals / (1.0 + self.L_eig_vals) return self.sample_exact(mode=self.sampling_mode, random_state=rng) elif self.L_dual is not None: # L_dual = Phi Phi.T = W Theta W.T # L = Phi.T Phi = V Gamma V # implies Gamma = Theta and V = Phi.T W Theta^{-1/2} self.L_eig_vals, L_dual_eig_vecs = la.eigh(self.L_dual) self.L_eig_vals = is_geq_0(self.L_eig_vals) self.eig_vecs = self.L_gram_factor.T.dot(L_dual_eig_vecs / np.sqrt(self.L_eig_vals)) return self.sample_exact(mode=self.sampling_mode, random_state=rng) # If DPP defined via projection correlation kernel K # no eigendecomposition required elif self.K is not None and self.projection: sampl = proj_dpp_sampler_kernel(self.K, self.sampling_mode, random_state=rng) elif self.K is not None: self.K_eig_vals, self.eig_vecs = la.eigh(self.K) self.K_eig_vals = is_in_01(self.K_eig_vals) return self.sample_exact(mode=self.sampling_mode, random_state=rng) elif self.L is not None: self.L_eig_vals, self.eig_vecs = la.eigh(self.L) self.L_eig_vals = is_geq_0(self.L_eig_vals) return self.sample_exact(mode=self.sampling_mode, random_state=rng) # If DPP defined through correlation kernel with parameter 'A_zono' # a priori you wish to use the zonotope approximate sampler elif self.A_zono is not None: warn( 'DPP defined via `A_zono`, apriori you want to use `sample_mcmc`, but you have called `sample_exact`' ) self.K_eig_vals = np.ones(self.A_zono.shape[0]) self.eig_vecs, _ = la.qr(self.A_zono.T, mode='economic') return self.sample_exact(mode=self.sampling_mode, random_state=rng) elif self.eval_L is not None and self.X_data is not None: self.compute_L() return self.sample_exact(mode=self.sampling_mode, random_state=rng) else: raise ValueError( 'None of the available samplers could be used based on the current DPP representation. This should never happen, please consider rasing an issue on github at https://github.com/guilgautier/DPPy/issues' ) self.list_of_samples.append(sampl) return sampl