Example #1
0
    def predict(self, X):
        """Predict the labels for the data samples in X using trained model.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            List of n_features-dimensional data points. Each row
            corresponds to a single data point.

        Returns
        -------
        labels : array, shape (n_samples,)
            Component labels.
        """
        self._check_is_fitted()
        X = _check_X(X, None, self.locations_.shape[1])
        return self._estimate_weighted_log_prob(X).argmax(axis=1)
Example #2
0
    def score_samples(self, X):
        """Compute the weighted log probabilities for each sample.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            List of n_features-dimensional data points. Each row
            corresponds to a single data point.

        Returns
        -------
        log_prob : array, shape (n_samples,)
            Log probabilities of each data point in X.
        """
        self._check_is_fitted()
        X = _check_X(X, None, self.locations_.shape[1])

        return logsumexp(self._estimate_weighted_log_prob(X), axis=1)
Example #3
0
    def predict_proba(self, X):
        """Predict posterior probability of each component given the data.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            List of n_features-dimensional data points. Each row
            corresponds to a single data point.

        Returns
        -------
        resp : array, shape (n_samples, n_components)
            Returns the probability of each mixture (state) in
            the model given each sample.
        """
        self._check_is_fitted()
        X = _check_X(X, None, self.locations_.shape[1])
        _, log_resp, _ = self._estimate_log_prob_resp(X)
        return np.exp(log_resp)
Example #4
0
    def cdf(self, X, maxpts=1e+7, abseps=1e-6, releps=1e-6):
        """Cumulative distribution function of the samples in X

        Parameters
        ----------
        X : array of shape (n_samples, n_features)
        maxpts: integer
            The maximum number of points to use for integration (used when n_features > 3)
        abseps: float
            Absolute error tolerance (used when n_features > 1)
        releps: float
            Relative error tolerance (used when n_features == 2 or n_features == 3)

        Returns
        -------
        cdf : array of shape (n_samples,)
        """

        self._check_is_fitted()
        X = _check_X(X, None, self.locations_.shape[1])
        n_features = X.shape[1]
        f = np.zeros(X.shape[0])

        for i in range(self.n_components):
            location = self.locations_[i, :]
            dof = self.dofs_[i]
            weight = self.weights_[i]
            if self.scale_type == 'full':
                scale = self.scales_[i, :, :]
            elif self.scale_type == "tied":
                scale = self.scales_
            elif self.scale_type == 'diag':
                scale = np.diag(self.scales_[i, :])
            else:
                scale = np.diag(np.array([self.scales_[i]] * n_features))

            f = f + weight * _multivariate_t_cdf(X, location, scale, dof, maxpts=maxpts, abseps=abseps, releps=releps)

        f[f > 1.0] = 1.0
        f[f < 0.0] = 0.0
        return f
Example #5
0
    def fit_predict(self, X, y=None):
        """Estimate model parameters using X and predict the labels for X.

        The method fits the model n_init times and sets the parameters with
        which the model has the largest likelihood or lower bound. Within each
        trial, the method iterates between E-step and M-step for `max_iter`
        times until the change of likelihood or lower bound is less than
        `tol`, otherwise, a `ConvergenceWarning` is raised. After fitting, it
        predicts the most probable label for the input data points.

        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            List of n_features-dimensional data points. Each row
            corresponds to a single data point.

        Returns
        -------
        labels : array, shape (n_samples,)
            Component labels.
        """
        X = _check_X(X, self.n_components, ensure_min_samples=2)
        self._check_initial_parameters(X)

        # if we enable warm_start, we will have a unique initialisation
        do_init = not(self.warm_start and hasattr(self, 'converged_'))
        n_init = self.n_init if do_init else 1

        max_lower_bound = -np.infty
        self.converged_ = False

        random_state = check_random_state(self.random_state)

        for init in range(n_init):
            self._print_verbose_msg_init_beg(init)

            if do_init:
                self._initialize_parameters(X, random_state)

            lower_bound = (-np.infty if do_init else self.lower_bound_)

            for n_iter in range(1, self.max_iter + 1):
                prev_lower_bound = lower_bound

                log_prob_norm, log_resp, gamma_priors = self._e_step(X)
                self._m_step(X, log_resp, gamma_priors)
                lower_bound = self._compute_lower_bound(
                    log_resp, log_prob_norm)

                change = lower_bound - prev_lower_bound
                self._print_verbose_msg_iter_end(n_iter, change)

                if abs(change) < self.tol:
                    self.converged_ = True
                    break

            self._print_verbose_msg_init_end(lower_bound)

            if lower_bound > max_lower_bound:
                max_lower_bound = lower_bound
                best_params = self._get_parameters()
                best_n_iter = n_iter

        if not self.converged_:
            warnings.warn('Initialization %d did not converge. '
                          'Try different init parameters, '
                          'or increase max_iter, tol '
                          'or check for degenerate data.'
                          % (init + 1), ConvergenceWarning)

        self._set_parameters(best_params)
        self.n_iter_ = best_n_iter
        self.lower_bound_ = max_lower_bound

        # Always do a final e-step to guarantee that the labels returned by
        # fit_predict(X) are always consistent with fit(X).predict(X)
        # for any value of max_iter and tol (and any random_state).
        _, log_resp, _ = self._e_step(X)

        return log_resp.argmax(axis=1)