def predict_proba(self, X, XErr, priorStar=None): if priorStar is not None: self._priorStar = priorStar logLStar = logsumexp(self.clfStar.logprob_a(X, XErr), -1) logLGal = logsumexp(self.clfGal.logprob_a(X, XErr), -1) logposteriorStar = logLStar + np.log(self._priorStar) logposteriorGal = logLGal + np.log(1.0 - self._priorStar) posteriorStar = np.exp(logposteriorStar) posteriorGal = np.exp(logposteriorGal) posteriorStar /= posteriorStar + posteriorGal return posteriorStar
def test_logsumexp(): np.random.seed(0) X = np.random.random((100, 100)) for axis in (None, 0, 1): np_result = np.log(np.sum(np.exp(X), axis=axis)) aML_result = logsumexp(X, axis=axis) assert_array_almost_equal(np_result, aML_result)
def score_samples(self, X, Xerr): """Return the per-sample likelihood of the data under the model. Compute the log probability of X under the model and return the posterior distribution (responsibilities) of each mixture component for each element of X. Parameters ---------- X: array_like, shape (n_samples, n_features) List of n_features-dimensional data points. Each row corresponds to a single data point. Xerr: array_like errors, shape = (n_samples, n_features, n_features) Returns ------- logprob : array_like, shape (n_samples,) Log probabilities of each data point in X. responsibilities : array_like, shape (n_samples, n_components) Posterior probabilities of each mixture component for each observation """ X = np.asarray(X) if X.ndim == 1: X = X[:, np.newaxis] if X.size == 0: return np.array([]), np.empty((0, self.n_components)) if X.shape[1] != self.mu.shape[1]: raise ValueError('The shape of X is not compatible with self') #### offending line ## have to check if there is the self.weights_ these are the weights of ## the different gaussians that are being mixed #### original line from scikit learn GMM ## check the types of return of log_multivariate_normal_density ## - [x] check the types of return of self.alphas - same as gmm.weights_ ## ## lpr = (log_multivariate_normal_density(X, self.means_, self.covars_, ## self.covariance_type) ## logprob_a computes the log multivariate gaussian for one gaussian at ## time and give return of N(x|mu, V) but the algorithm might be smart ## enough to ## original code computes an array of instead ## - need to find the corresponding ## mu and V for each components to compute ## [N(x|mu_1, V_1), N(x|mu_2, V_2), ..., N(x|mu_n_comp, V_n_comp)] ## following line is supposed to add in so each log P of a particular ## normal in the mixture add with the log weight of that log lpr = (self.logprob_a(X, Xerr) + np.log(self.alpha)) logprob = logsumexp(lpr, axis=1) responsibilities = np.exp(lpr - logprob[:, np.newaxis]) return logprob, responsibilities
def computeStellarPosteriors(X, XErr, ngStar=None, ngGal=None, priorStar=0.5, magMin=None, magMax=None, extMax=None): clfStar, clfGal = loadXDFits(ngStar=ngStar, ngGal=ngGal, magMin=magMin, magMax=magMax, extMax=extMax) logLStar = logsumexp(clfStar.logprob_a(X, XErr), -1) logLGal = logsumexp(clfGal.logprob_a(X, XErr), -1) logposteriorStar = logLStar + np.log(priorStar) logposteriorGal = logLGal + np.log(1.0 - priorStar) posteriorStar = np.exp(logposteriorStar) posteriorGal = np.exp(logposteriorGal) posteriorStar /= posteriorStar + posteriorGal return posteriorStar
def logL(self, X, Xerr): """Compute the log-likelihood of data given the model Parameters ---------- X: array_like data, shape = (n_samples, n_features) Xerr: array_like errors, shape = (n_samples, n_features, n_features) Returns ------- logL : float log-likelihood """ return np.sum(logsumexp(self.logprob_a(X, Xerr), -1))
def score(self, X, Xerr): """Compute the score of data given the model Provides the mean log-likelihood of the data in X based on the model as a score for scikit-learn cross-validation. Parameters ---------- X: array_like, shape = (n_samples, n_features) Input data. Xerr: array_like, shape = (n_samples, n_features, n_features) Error on input data. Returns ------- score : float Score (log-likelihood). """ if self.V is None or self.mu is None or self.weights is None: raise StandardError("Model parameters not set.") logprob = self.GMM.logprob_a(X,Xerr) logLs = logsumexp(logprob,axis=-1) return np.mean(logLs)