def predict(self, X): """Predict the labels for the data samples in X using trained model. Parameters ---------- X : array-like, shape (n_samples, n_features) List of n_features-dimensional data points. Each row corresponds to a single data point. Returns ------- labels : array, shape (n_samples,) Component labels. """ self._check_is_fitted() X = _check_X(X, None, self.locations_.shape[1]) return self._estimate_weighted_log_prob(X).argmax(axis=1)
def score_samples(self, X): """Compute the weighted log probabilities for each sample. Parameters ---------- X : array-like, shape (n_samples, n_features) List of n_features-dimensional data points. Each row corresponds to a single data point. Returns ------- log_prob : array, shape (n_samples,) Log probabilities of each data point in X. """ self._check_is_fitted() X = _check_X(X, None, self.locations_.shape[1]) return logsumexp(self._estimate_weighted_log_prob(X), axis=1)
def predict_proba(self, X): """Predict posterior probability of each component given the data. Parameters ---------- X : array-like, shape (n_samples, n_features) List of n_features-dimensional data points. Each row corresponds to a single data point. Returns ------- resp : array, shape (n_samples, n_components) Returns the probability of each mixture (state) in the model given each sample. """ self._check_is_fitted() X = _check_X(X, None, self.locations_.shape[1]) _, log_resp, _ = self._estimate_log_prob_resp(X) return np.exp(log_resp)
def cdf(self, X, maxpts=1e+7, abseps=1e-6, releps=1e-6): """Cumulative distribution function of the samples in X Parameters ---------- X : array of shape (n_samples, n_features) maxpts: integer The maximum number of points to use for integration (used when n_features > 3) abseps: float Absolute error tolerance (used when n_features > 1) releps: float Relative error tolerance (used when n_features == 2 or n_features == 3) Returns ------- cdf : array of shape (n_samples,) """ self._check_is_fitted() X = _check_X(X, None, self.locations_.shape[1]) n_features = X.shape[1] f = np.zeros(X.shape[0]) for i in range(self.n_components): location = self.locations_[i, :] dof = self.dofs_[i] weight = self.weights_[i] if self.scale_type == 'full': scale = self.scales_[i, :, :] elif self.scale_type == "tied": scale = self.scales_ elif self.scale_type == 'diag': scale = np.diag(self.scales_[i, :]) else: scale = np.diag(np.array([self.scales_[i]] * n_features)) f = f + weight * _multivariate_t_cdf(X, location, scale, dof, maxpts=maxpts, abseps=abseps, releps=releps) f[f > 1.0] = 1.0 f[f < 0.0] = 0.0 return f
def fit_predict(self, X, y=None): """Estimate model parameters using X and predict the labels for X. The method fits the model n_init times and sets the parameters with which the model has the largest likelihood or lower bound. Within each trial, the method iterates between E-step and M-step for `max_iter` times until the change of likelihood or lower bound is less than `tol`, otherwise, a `ConvergenceWarning` is raised. After fitting, it predicts the most probable label for the input data points. Parameters ---------- X : array-like, shape (n_samples, n_features) List of n_features-dimensional data points. Each row corresponds to a single data point. Returns ------- labels : array, shape (n_samples,) Component labels. """ X = _check_X(X, self.n_components, ensure_min_samples=2) self._check_initial_parameters(X) # if we enable warm_start, we will have a unique initialisation do_init = not(self.warm_start and hasattr(self, 'converged_')) n_init = self.n_init if do_init else 1 max_lower_bound = -np.infty self.converged_ = False random_state = check_random_state(self.random_state) for init in range(n_init): self._print_verbose_msg_init_beg(init) if do_init: self._initialize_parameters(X, random_state) lower_bound = (-np.infty if do_init else self.lower_bound_) for n_iter in range(1, self.max_iter + 1): prev_lower_bound = lower_bound log_prob_norm, log_resp, gamma_priors = self._e_step(X) self._m_step(X, log_resp, gamma_priors) lower_bound = self._compute_lower_bound( log_resp, log_prob_norm) change = lower_bound - prev_lower_bound self._print_verbose_msg_iter_end(n_iter, change) if abs(change) < self.tol: self.converged_ = True break self._print_verbose_msg_init_end(lower_bound) if lower_bound > max_lower_bound: max_lower_bound = lower_bound best_params = self._get_parameters() best_n_iter = n_iter if not self.converged_: warnings.warn('Initialization %d did not converge. ' 'Try different init parameters, ' 'or increase max_iter, tol ' 'or check for degenerate data.' % (init + 1), ConvergenceWarning) self._set_parameters(best_params) self.n_iter_ = best_n_iter self.lower_bound_ = max_lower_bound # Always do a final e-step to guarantee that the labels returned by # fit_predict(X) are always consistent with fit(X).predict(X) # for any value of max_iter and tol (and any random_state). _, log_resp, _ = self._e_step(X) return log_resp.argmax(axis=1)