def test_pseudodet_pinv(): # Make sure that pseudo-inverse and pseudo-det agree on cutoff # Assemble random covariance matrix with large and small eigenvalues np.random.seed(1234) n = 7 x = np.random.randn(n, n) cov = np.dot(x, x.T) s, u = scipy.linalg.eigh(cov) s = 0.5 * np.ones(n) s[0] = 1.0 s[-1] = 1e-7 cov = np.dot(u, np.dot(np.diag(s), u.T)) # Set cond so that the lowest eigenvalue is below the cutoff cond = 1e-5 psd = _PSD(cov, cond=cond) psd_pinv = _PSD(psd.pinv, cond=cond) # Check that the log pseudo-determinant agrees with the sum # of the logs of all but the smallest eigenvalue assert_allclose(psd.log_pdet, np.sum(np.log(s[:-1]))) # Check that the pseudo-determinant of the pseudo-inverse # agrees with 1 / pseudo-determinant assert_allclose(-psd.log_pdet, psd_pinv.log_pdet)
def logpdf(self, x, loc=None, scale=1, gamma=3): """Log of the multivariate EFF probability density function. Parameters ---------- x : array_like Points at which to evaluate the log of the probability density function. loc : array_like, optional Centre of the distribution (default zero). scale : array_like, optional Positive definite shape matrix. This is not the distribution's covariance matrix (default one). gamma : EFF's gamma parameter. Returns ------- logpdf : Log of the probability density function evaluated at `x`. """ dim, loc, scale, gamma = self._process_parameters(loc, scale, gamma) x = self._process_quantiles(x, dim) scale_info = _PSD(scale) if dim == 3: return self._logpdf3d(x,loc,scale_info.U,scale_info.log_pdet,gamma,dim) else: sys.exit("ERROR DIM NOT IMPLEMENTED")
def pdf(self, x, location=None, scale=1, dof=None, allow_singular=False): """ Multivariate Student's t probability density function. Parameters ---------- x : array_like Quantiles, with the last axis of `x` denoting the components. location : ndarray Location of the distribution scale : array_like Scale matrix of the distribution dof : scalar Degrees-of-freedom of the distribution Returns ------- pdf : ndarray or scalar Probability density function evaluated at `x` """ dim, location, scale, dof = self._process_parameters( None, location, scale, dof) x = self._process_quantiles(x, dim) psd = _PSD(scale, allow_singular=allow_singular) out = np.exp( self._logpdf(x, location, psd.U, psd.log_pdet, psd.rank, dof)) return _squeeze_output(out)
def logpdf(self, x, loc=None, scale=1, rt=10): """Log of the multivariate King probability density function. Parameters ---------- x : array_like Points at which to evaluate the log of the probability density function. loc : array_like, optional Centre of the distribution (default zero). scale : array_like, optional Positive definite shape matrix. This is not the distribution's covariance matrix (default one). rt : King's tidal radius parameter. Returns ------- logpdf : Log of the probability density function evaluated at `x`. """ dim, loc, scale, rt = self._process_parameters(loc, scale, rt) x = self._process_quantiles(x, dim) scale_info = _PSD(scale) if dim == 1: return self._logpdf1d(x,loc,scale_info.U,scale_info.log_pdet,rt) elif dim == 3: return self._logpdf3d(x,loc,scale_info.U,scale_info.log_pdet,rt) else: sys.exit("Dimension not implemented")
def logpdf(self, x, mean=None, shape=1, df=1): """Log of the multivariate t-distribution probability density function. Parameters ---------- x : array_like Points at which to evaluate the log of the probability density function. mean : array_like, optional Mean of the distribution (default zero). shape : array_like, optional Positive definite shape matrix. This is not the distribution's covariance matrix (default one). df : Degrees of freedom. Returns ------- logpdf : Log of the probability density function evaluated at `x`. Examples -------- FIXME. """ dim, mean, shape, df = self._process_parameters(mean, shape, df) x = self._process_quantiles(x, dim) shape_info = _PSD(shape) return self._logpdf(x, mean, shape_info.U, shape_info.log_pdet, df, dim)
def test_large_pseudo_determinant(): # Check that large pseudo-determinants are handled appropriately. # Construct a singular diagonal covariance matrix # whose pseudo determinant overflows double precision. large_total_log = 1000.0 npos = 100 nzero = 2 large_entry = np.exp(large_total_log / npos) n = npos + nzero cov = np.zeros((n, n), dtype=float) np.fill_diagonal(cov, large_entry) cov[-nzero:, -nzero:] = 0 # Check some determinants. assert_equal(scipy.linalg.det(cov), 0) assert_equal(scipy.linalg.det(cov[:npos, :npos]), np.inf) # np.linalg.slogdet is only available in numpy 1.6+ # but scipy currently supports numpy 1.5.1. # assert_allclose(np.linalg.slogdet(cov[:npos, :npos]), (1, large_total_log)) # Check the pseudo-determinant. psd = _PSD(cov) assert_allclose(psd.log_pdet, large_total_log)
def pdf(self, x, mean=None, cov=1, allow_singular=False): """ Multivariate laplace probability density function. Parameters ---------- x : array_like Quantiles, with the last axis of `x` denoting the components. %(_mvl_doc_default_callparams)s Returns ------- pdf : ndarray or scalar Probability density function evaluated at `x` Notes ----- %(_mvl_doc_callparams_note)s """ dim, mean, cov = self._process_parameters(None, mean, cov) x = self._process_quantiles(x, dim) psd = _PSD(cov, allow_singular=allow_singular) out = np.exp(self._logpdf(x, mean, psd.U, psd.log_pdet, psd.rank)) return _squeeze_output(out)
def cdf(self, x, mean=None, cov=1, allow_singular=False): """ Multivariate laplace cumulative distribution function. Parameters ---------- x : array_like Quantiles, with the last axis of `x` denoting the components. %(_mvl_doc_default_callparams)s Returns ------- cdf : ndarray or scalar Cumulative distribution function evaluated at `x` Notes ----- %(_mvl_doc_callparams_note)s .. versionadded:: 1.0.0 """ dim, mean, cov = self._process_parameters(None, mean, cov) x = self._process_quantiles(x, dim) psd = _PSD(cov, allow_singular=allow_singular) out = self._cdf(x, mean, psd.U) return _squeeze_output(out)
def _e_step(self, X): ''' E-step of the algorithm Computes the conditional probability of the model Parameters ---------- X: array-like, shape (n, p) data Returns ---------- cond_prob_matrix: array-like, shape (n, K) (cond_prob_matrix)_ik = P(Z_i=k|X_i=x_i) ''' n, p = X.shape K = len(self.alpha_) cond_prob_matrix = np.zeros((n,K)) for k in range(K): psd = _PSD(self.Sigma_[k]) prec_U, logdet = psd.U, psd.log_pdet diff = X - self.mu_[k] logdensity = -0.5 * (p * np.log(2 * np.pi) + p * np.log(self.tau_[:, k]) + logdet + p) cond_prob_matrix[:, k] = np.exp(logdensity) * self.alpha_[k] sum_row = np.sum(cond_prob_matrix, axis = 1) bool_sum_zero = (sum_row == 0) cond_prob_matrix[bool_sum_zero, :] = self.alpha_ cond_prob_matrix /= cond_prob_matrix.sum(axis=1)[:,np.newaxis] return cond_prob_matrix
def __init__(self, mean=None, shape=1, df=1, seed=None): """Create a frozen multivariate t-distribution. See `MultivariateTGenerator` for parameters. """ self._dist = MultivariateTGenerator(seed) mean, shape, df = self._dist._process_parameters(mean, shape, df) self.shape_info = _PSD(shape) self.mean, self.shape, self.df = mean, shape, df
def logcdf(self, x, location=None, scale=1, dof=None, allow_singular=False, maxpts=None, abseps=1e-5, releps=1e-5): """ Log of the multivariate Student's t cumulative distribution function. Parameters ---------- x : array_like Quantiles, with the last axis of `x` denoting the components. location : ndarray Location of the distribution scale : array_like Scale matrix of the distribution dof : scalar Degrees-of-freedom of the distribution maxpts: integer, optional The maximum number of points to use for integration (default `1000000*dim`) abseps: float, optional Absolute error tolerance (default 1e-5) releps: float, optional Relative error tolerance (default 1e-5) Returns ------- cdf : ndarray or scalar Log of the cumulative distribution function evaluated at `x` """ dim, location, scale, dof = self._process_parameters( None, location, scale, dof) x = self._process_quantiles(x, dim) # Use _PSD to check covariance matrix _PSD(scale, allow_singular=allow_singular) if not maxpts: maxpts = 1000000 * dim out = np.log(self._cdf(x, location, scale, dof, maxpts, abseps, releps)) return out
def cdf(self, x, df, mean=None, cov=1, allow_singular=False, maxpts=None, abseps=1e-5, releps=1e-5): """ Multivariate Student's T cumulative distribution function. Parameters ---------- x : array_like Quantiles, with the last axis of `x` denoting the components. %(_mvt_doc_default_callparams)s maxpts: integer, optional The maximum number of points to use for integration (default `1000000*dim`) abseps: float, optional Absolute error tolerance (default 1e-5) releps: float, optional Relative error tolerance (default 1e-5) Returns ------- cdf : ndarray or scalar Cumulative distribution function evaluated at `x` Notes ----- %(_mvt_doc_callparams_note)s .. versionadded:: 1.0.0 """ dim, df, mean, cov = self._process_parameters(None, df, mean, cov) x = self._process_quantiles(x, dim) # Use _PSD to check covariance matrix _PSD(cov, allow_singular=allow_singular) if not maxpts: maxpts = 1000000 * dim out = self._cdf(x, df, mean, cov, maxpts, abseps, releps) return out
def __init__(self, mean=None, cov=1, allow_singular=False, seed=None, maxpts=None, abseps=1e-5, releps=1e-5): """ Create a frozen multivariate laplace distribution. Parameters ---------- mean : array_like, optional Mean of the distribution (default zero) cov : array_like, optional Covariance matrix of the distribution (default one) allow_singular : bool, optional If this flag is True then tolerate a singular covariance matrix (default False). seed : {None, int, `~np.random.RandomState`, `~np.random.Generator`}, optional This parameter defines the object to use for drawing random variates. If `seed` is `None` the `~np.random.RandomState` singleton is used. If `seed` is an int, a new ``RandomState`` instance is used, seeded with seed. If `seed` is already a ``RandomState`` or ``Generator`` instance, then that object is used. Default is None. maxpts: integer, optional The maximum number of points to use for integration of the cumulative distribution function (default `1000000*dim`) abseps: float, optional Absolute error tolerance for the cumulative distribution function (default 1e-5) releps: float, optional Relative error tolerance for the cumulative distribution function (default 1e-5) Examples -------- When called with the default parameters, this will create a 1D random variable with mean 0 and covariance 1: >>> from scipy.stats import multivariate_laplace >>> r = multivariate_laplace() >>> r.mean array([ 0.]) >>> r.cov array([[1.]]) """ self._dist = multivariate_laplace_gen(seed) self.dim, self.mean, self.cov = self._dist._process_parameters( None, mean, cov) self.cov_info = _PSD(self.cov, allow_singular=allow_singular) if not maxpts: maxpts = 1000000 * self.dim self.maxpts = maxpts self.abseps = abseps self.releps = releps
def logpdf(x, mean=None, cov=None, allow_singular=False, coef=1, psd=None, return_psd=False): if mean is None: mean = np.zeros(x.shape[-1], dtype=np.float64) if cov is None: cov = np.eye(x.shape[-1], dtype=np.float64) if psd is None: psd = _PSD(cov, allow_singular=allow_singular) out = _logpdf(x, mean, psd.U, psd.log_pdet, psd.rank, coef) return (_squeeze_output(out), psd) if return_psd else _squeeze_output(out)
def rvs(self, loc=None, scale=1, rt=10, size=1, random_state=None, min_u=1e-5): """Draw random samples from a multivariate King distribution. Parameters ---------- x : array_like Points at which to evaluate the log of the probability density function. loc : array_like, optional Mean of the distribution (default zero). scale : array_like, optional Positive definite shape matrix. This is not the distribution's covariance matrix (default one). rt : King's tidal radius parameter. Returns ------- """ if random_state is not None: rng = check_random_state(random_state) else: rng = self._random_state dim, loc, scale, rt = self._process_parameters(loc, scale, rt) scale_info = _PSD(scale) # rt = rt*np.exp(0.5*scale_info.log_pdet) #------ Take samples from the distance ------------- if dim == 1 : rho = self._rvs_r1d(rt=rt, size=size,min_u=min_u) elif dim == 3: rho = self._rvs_r3d(rt=rt, size=size,min_u=min_u) else: sys.exit("Dimension {0} not implemented".format(dim)) #------ Samples from the angles ------- samples = toCartesian(rho,dim,random_state=rng).reshape(size,dim) if dim > 1: chol = np.linalg.cholesky(scale) samples = np.dot(samples,chol) else: samples = scale*samples return loc + samples
def rvs(self, loc=None, scale=1, gamma=3, size=1, random_state=None): """Draw random samples from a multivariate EFF distribution. Parameters ---------- x : array_like Points at which to evaluate the log of the probability density function. loc : array_like, optional Mean of the distribution (default zero). scale : array_like, optional Positive definite shape matrix. This is not the distribution's covariance matrix (default one). gamma : EFF gamma parameter. Returns ------- """ if random_state is not None: rng = check_random_state(random_state) else: rng = self._random_state dim, loc, scale, gamma = self._process_parameters(loc, scale, gamma) scale_info = _PSD(scale) # rc = np.exp(log_pdet) #------ Take samples from the distance ------------- if dim == 1 : rho = eff.rvs(gamma=gamma,size=size) elif dim == 3: rho = self._rvs_r3d(rc=1.0, gamma=gamma, size=size) else: sys.exit("Dimension {0} not implemented".format(dim)) #------ Samples from the angles ------- samples = toCartesian(rho,dim,random_state=rng).reshape(size,dim) if dim > 1: chol = np.linalg.cholesky(scale) samples = np.dot(samples,chol) else: samples = samples*scale return loc + samples
def predict(self, Xnew, thres=None): n, p = Xnew.shape cond_prob_matrix = np.zeros((n, self.K)) for k in range(self.K): psd = _PSD(self.Sigma_[k]) prec_U, logdet = psd.U, psd.log_pdet diff = Xnew - self.mu_[k] sig = np.mean(diff * diff) maha = (np.dot(diff, np.linalg.inv(self.Sigma_[k])) * diff).sum(1) logdensity = -0.5 * (logdet + maha) cond_prob_matrix[:, k] = np.exp(logdensity) * self.alpha_[k] sum_row = np.sum(cond_prob_matrix, axis=1) bool_sum_zero = (sum_row == 0) cond_prob_matrix[bool_sum_zero, :] = self.alpha_ cond_prob_matrix /= cond_prob_matrix.sum(axis=1)[:, np.newaxis] new_labels = np.array([i for i in np.argmax(cond_prob_matrix, axis=1)]) outlierness = np.zeros((n, )).astype(bool) if thres is None: thres = self.thres thres = chi2.ppf(1 - thres, p) for k in range(self.K): data_cluster = Xnew[new_labels == k, :] diff_cluster = data_cluster - self.mu_[k] sig_cluster = np.mean(diff_cluster * diff_cluster) maha_cluster = (np.dot(diff_cluster, np.linalg.inv(self.Sigma_[k])) * diff_cluster).sum(1) / sig_cluster outlierness[new_labels == k] = (maha_cluster > thres) new_labels[outlierness] = -1 new_labels = new_labels.astype(str) return (new_labels)
def __init__(self, mu=None, sigma=None, df=None): if sigma is not None: sigma = np.asarray(sigma) self.mu = np.zeros(sigma.shape[0]) if mu is None else np.asarray(mu) if sigma is None: sigma = np.ones(len(mu)) if len(sigma.shape) == 1: sigma = np.diag(sigma) self.dim = len(self.mu) self.sigma = sigma self.df = df # Use scipy stats to compute |Sigma| and (x-mu)^T Sigma^{-1} (x - mu), # and to estimate dimension p from rank. Formula for pdf from wikipedia # https://en.wikipedia.org/wiki/Multivariate_t-distribution from scipy.stats._multivariate import _PSD self._psd = _PSD(self.sigma) nu, p = self.df, self._psd.rank self._log_norm = (gammaln((nu + p) / 2) - gammaln(nu / 2) - p / 2 * log(pi * nu) - self._psd.log_pdet / 2)
def __init__(self, loc=None, scale=1, rt=10, seed=None): """ Create a frozen multivariate King frozen distribution. Parameters ---------- x : array_like Points at which to evaluate the log of the probability density function. loc : array_like, optional Mean of the distribution (default zero). scale : array_like, optional Positive definite shape matrix. This is not the distribution's covariance matrix (default one). rt : King's tidal radius parameter. """ self._dist = multivariate_king_gen(seed) self.dim, self.loc, self.scale, self.rt = self._dist._process_parameters(loc, scale, rt) self.scale_info = _PSD(self.scale)
def __init__(self, loc=None, scale=1, gamma=2, seed=None): """ Create a frozen multivariate EFF frozen distribution. Parameters ---------- x : array_like Points at which to evaluate the log of the probability density function. loc : array_like, optional Mean of the distribution (default zero). scale : array_like, optional Positive definite shape matrix. This is not the distribution's covariance matrix (default one). gamma : EFF gamma parameter. """ self._dist = multivariate_eff_gen(seed) self.dim, self.loc, self.scale, self.gamma = self._dist._process_parameters(loc, scale, gamma) self.scale_info = _PSD(self.scale)
def __init__(self, mean=None, shape=1, df=1, seed=None): """ Create a frozen multivariate normal distribution. Parameters ---------- x : array_like Points at which to evaluate the log of the probability density function. mean : array_like, optional Mean of the distribution (default zero). shape : array_like, optional Positive definite shape matrix. This is not the distribution's covariance matrix (default one). df : Degrees of freedom. Examples -------- FIXME. """ self._dist = multivariate_t_gen(seed) dim, mean, shape, df = self._dist._process_parameters(mean, shape, df) self.dim, self.mean, self.shape, self.df = dim, mean, shape, df self.shape_info = _PSD(shape)
def __init__(self, mu=None, sigma=None, df=None): if sigma is not None: sigma = np.asarray(sigma) self.mu = np.zeros(sigma.shape[0]) if mu is None else np.asarray(mu) if sigma is None: sigma = np.ones(len(mu)) if len(sigma.shape) == 1: sigma = np.diag(sigma) self.dim = len(self.mu) self.sigma = sigma self.df = df # Use scipy stats to compute |Sigma| and (x-mu)^T Sigma^{-1} (x - mu), # and to estimate dimension p from rank. Formula for pdf from wikipedia # https://en.wikipedia.org/wiki/Multivariate_t-distribution from scipy.stats._multivariate import _PSD self._psd = _PSD(self.sigma) nu, p = self.df, self._psd.rank self._log_norm = (gammaln((nu + p)/2) - gammaln(nu/2) - p/2*log(pi*nu) - self._psd.log_pdet/2 )
def logpdf(self, x, mean=None, shape=1, df=1): """Log of the multivariate t-distribution probability density function. Parameters ---------- x : p-dimensional ndarray or n-by-p matrix. mean : p-dimensional ndarray. shape : p-by-p positive definite shape matrix. This is not the distribution's covariance matrix. df : Degrees of freedom. Returns ------- logpdf : Log of the probability density function evaluated at `x`. """ mean, shape, df = self._process_parameters(mean, shape, df) shape_info = _PSD(shape) return self._logpdf(x, mean, shape_info.U, shape_info.log_pdet, df)
def pdf(x, mean, cov, out=None, x_copy=None): psd = mv._PSD(cov, allow_singular=False) y = log_pdf(x, mean, psd.U, psd.log_pdet, psd.rank, out, x_copy) return np.exp(y, out=y)
def __init__(self, location=None, scale=1, dof=None, allow_singular=False, seed=None, maxpts=None, abseps=1e-5, releps=1e-5): """ Create a frozen multivariate Student's t-distribution. Parameters ---------- location : array_like, optional Location of the distribution (default zero) scale : array_like, optional Scale matrix of the distribution (default one) dof : None or scalar, optional Degrees-of-freedom of the distribution (default numpy.inf) allow_singular : bool, optional If this flag is True then tolerate a singular scale matrix (default False). seed : None or int or np.random.RandomState instance, optional This parameter defines the RandomState object to use for drawing random variates. If None (or np.random), the global np.random state is used. If integer, it is used to seed the local RandomState instance Default is None. maxpts: integer, optional The maximum number of points to use for integration of the cumulative distribution function (default `1000000*dim`) abseps: float, optional Absolute error tolerance for the cumulative distribution function (default 1e-5) releps: float, optional Relative error tolerance for the cumulative distribution function (default 1e-5) Examples -------- When called with the default parameters, this will create a 1D random variable with mean 0 and covariance 1: >>> from student_mixture import multivariate_t >>> r = multivariate_t() >>> r.location array([ 0.]) >>> r.scale array([[1.]]) >>> r.dof inf """ self._dist = multivariate_t_gen(seed) self.dim, self.location, self.scale, self.dof = self._dist._process_parameters( None, location, scale, dof) self.scale_info = _PSD(self.scale, allow_singular=allow_singular) if not maxpts: maxpts = 1000000 * self.dim self.maxpts = maxpts self.abseps = abseps self.releps = releps
def __init__(self, df, mean=None, cov=1, allow_singular=False, seed=None, maxpts=None, abseps=1e-5, releps=1e-5): """ Create a frozen multivariate Student's T distribution. Parameters ---------- df : float Degrees of freedom of the distribution mean : array_like, optional Mean of the distribution (default zero) cov : array_like, optional Covariance matrix of the distribution (default one) allow_singular : bool, optional If this flag is True then tolerate a singular covariance matrix (default False). seed : None or int or np.random.RandomState instance, optional This parameter defines the RandomState object to use for drawing random variates. If None (or np.random), the global np.random state is used. If integer, it is used to seed the local RandomState instance Default is None. maxpts: integer, optional The maximum number of points to use for integration of the cumulative distribution function (default `1000000*dim`) abseps: float, optional Absolute error tolerance for the cumulative distribution function (default 1e-5) releps: float, optional Relative error tolerance for the cumulative distribution function (default 1e-5) Examples -------- When called with the default parameters, this will create a 1D random variable with mean 0 and covariance 1: >>> from scipy.stats import multivariate_t >>> r = multivariate_t(3) >>> r.df 3.0 >>> r.mean array([ 0.]) >>> r.cov array([[1.]]) """ self._dist = multivariate_t_gen(seed) self.dim, self.df, self.mean, self.cov = self._dist._process_parameters( None, df, mean, cov) self.cov_info = _PSD(self.cov, allow_singular=allow_singular) if not maxpts: maxpts = 1000000 * self.dim self.maxpts = maxpts self.abseps = abseps self.releps = releps
def __init__(self, mean, cov): self.mean = mean self.cov = cov self.psd = mv._PSD(cov, allow_singular=False)